diff --git a/.fernignore b/.fernignore
index c8666c2..d85561f 100644
--- a/.fernignore
+++ b/.fernignore
@@ -9,3 +9,8 @@ src/agora_agent/agentkit/
 
 # Documentation - managed manually, not generated by Fern
 docs/
+
+# Dependency manifests/lockfiles are managed manually
+pyproject.toml
+poetry.lock
+requirements.txt
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 16caff3..f46ffcf 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -15,8 +15,6 @@ jobs:
           curl -sSL https://install.python-poetry.org | python - -y --version 1.5.1
       - name: Install dependencies
         run: poetry install
-      - name: Validate docs
-        run: poetry run python scripts/validate_docs.py
       - name: Compile
         run: poetry run mypy .
   test:
@@ -33,8 +31,30 @@ jobs:
           curl -sSL https://install.python-poetry.org | python - -y --version 1.5.1
       - name: Install dependencies
         run: poetry install
-      - name: Validate docs
-        run: poetry run python scripts/validate_docs.py
 
       - name: Test
         run: poetry run pytest -rP .
+
+  publish:
+    needs: [compile, test]
+    if: github.event_name == 'push' && contains(github.ref, 'refs/tags/')
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repo
+        uses: actions/checkout@v4
+      - name: Set up python
+        uses: actions/setup-python@v4
+        with:
+          python-version: 3.8
+      - name: Bootstrap poetry
+        run: |
+          curl -sSL https://install.python-poetry.org | python - -y --version 1.5.1
+      - name: Install dependencies
+        run: poetry install
+      - name: Publish to pypi
+        run: |
+          poetry config repositories.remote https://upload.pypi.org/legacy/
+          poetry --no-interaction -v publish --build --repository remote --username "$PYPI_USERNAME" --password "$PYPI_PASSWORD"
+        env:
+          PYPI_USERNAME: ${{ secrets.PYPI_USERNAME }}
+          PYPI_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
deleted file mode 100644
index 58ef110..0000000
--- a/.github/workflows/release.yml
+++ /dev/null
@@ -1,76 +0,0 @@
-name: release
-
-on:
-  push:
-    tags:
-      - "v*"
-  workflow_dispatch:
-    inputs:
-      tag_name:
-        description: "Tag to publish"
-        required: false
-        type: string
-
-jobs:
-  release:
-    runs-on: ubuntu-latest
-    permissions:
-      contents: write
-      id-token: write
-    steps:
-      - name: Checkout repo
-        uses: actions/checkout@v4
-
-      - name: Set up python
-        uses: actions/setup-python@v4
-        with:
-          python-version: 3.8
-
-      - name: Bootstrap poetry
-        run: |
-          curl -sSL https://install.python-poetry.org | python - -y --version 1.5.1
-
-      - name: Install dependencies
-        run: poetry install
-
-      - name: Compile
-        run: poetry run mypy .
-
-      - name: Test
-        run: poetry run pytest -rP .
-
-      - name: Build
-        run: poetry build
-
-      - name: Extract changelog notes
-        id: changelog
-        env:
-          INPUT_TAG_NAME: ${{ github.event.inputs.tag_name }}
-        run: |
-          VERSION="${INPUT_TAG_NAME:-${GITHUB_REF_NAME}}"
-          NOTES=$(awk -v ver="## [${VERSION}]" '
-            index($0, ver) == 1 { found=1; next }
-            found && /^## / { exit }
-            found { print }
-          ' changelog.md)
-          echo "notes<<EOF" >> "$GITHUB_OUTPUT"
-          echo "$NOTES" >> "$GITHUB_OUTPUT"
-          echo "EOF" >> "$GITHUB_OUTPUT"
-
-      - name: Create GitHub Release
-        env:
-          GH_TOKEN: ${{ github.token }}
-          NOTES: ${{ steps.changelog.outputs.notes }}
-          INPUT_TAG_NAME: ${{ github.event.inputs.tag_name }}
-        run: |
-          VERSION="${INPUT_TAG_NAME:-${GITHUB_REF_NAME}}"
-          echo "$NOTES" > release_notes.md
-          gh release create "$VERSION" \
-            --title "$VERSION" \
-            --notes-file release_notes.md \
-            dist/*
-
-      - name: Publish to PyPI
-        uses: pypa/gh-action-pypi-publish@release/v1
-        with:
-          password: ${{ secrets.PYPI_API_TOKEN }}
diff --git a/README.md b/README.md
index 4c24c8b..f0ecfe5 100644
--- a/README.md
+++ b/README.md
@@ -1,9 +1,33 @@
-# Agora Agent Server SDK for Python
+# Agoraio Python Library
 
 [![fern shield](https://img.shields.io/badge/%F0%9F%8C%BF-Built%20with%20Fern-brightgreen)](https://buildwithfern.com?utm_source=github&utm_medium=github&utm_campaign=readme&utm_source=https%3A%2F%2Fgithub.com%2FAgoraIO-Conversational-AI%2Fagent-server-sdk-python)
 [![pypi](https://img.shields.io/pypi/v/agora-agent-server-sdk)](https://pypi.python.org/pypi/agora-agent-server-sdk)
 
-The Agora Agent Server SDK for Python lets you build real-time voice agents on Agora Conversational AI with a high-level `Agent` / `AgentSession` API and a generated low-level REST client.
+The Agora Conversational AI SDK provides convenient access to the Agora Conversational AI APIs, 
+enabling you to build voice-powered AI agents with support for both cascading flows (ASR -> LLM -> TTS) 
+and multimodal flows (MLLM) for real-time audio processing.
+
+
+## Table of Contents
+
+- [Requirements](#requirements)
+- [Installation](#installation)
+- [Quick Start](#quick-start)
+- [Byok](#byok)
+- [Mllm Realtime Multimodal](#mllm-realtime-multimodal)
+- [Documentation](#documentation)
+- [Reference](#reference)
+- [Mllm Flow Multimodal](#mllm-flow-multimodal)
+- [Usage](#usage)
+- [Async Client](#async-client)
+- [Exception Handling](#exception-handling)
+- [Pagination](#pagination)
+- [Advanced](#advanced)
+  - [Access Raw Response Data](#access-raw-response-data)
+  - [Retries](#retries)
+  - [Timeouts](#timeouts)
+  - [Custom Client](#custom-client)
+- [Contributing](#contributing)
 
 ## Requirements
 
@@ -183,14 +207,323 @@ See the [MLLM Flow guide](./docs/guides/mllm-flow.md) for full examples with Gem
 
 ## Documentation
 
-- [Overview](./docs/index.md)
-- [Authentication](./docs/getting-started/authentication.md)
-- [Quick Start](./docs/getting-started/quick-start.md)
-- [BYOK Guide](./docs/guides/byok.md)
-- [MLLM Flow](./docs/guides/mllm-flow.md)
-- [Low-Level API](./docs/guides/low-level-api.md)
+API reference documentation is available [here](https://docs.agora.io/en/conversational-ai/overview).
 
 ## Reference
 
-- [SDK Reference](./reference.md)
-- [Agora Conversational AI Docs](https://docs.agora.io/en/conversational-ai/overview)
+A full reference for this library is available [here](https://github.com/AgoraIO-Conversational-AI/agent-server-sdk-python/blob/HEAD/./reference.md).
+
+## MLLM Flow (Multimodal)
+
+For real-time audio processing using OpenAI's Realtime API or Google Gemini Live, use the MLLM (Multimodal Large Language Model) flow instead of the cascading ASR -> LLM -> TTS flow. See the [MLLM Overview](https://docs.agora.io/en/conversational-ai/models/mllm/overview) for more details.
+
+```python
+from agora-agent-server-sdk import Agora
+from agora-agent-server-sdk.agents import (
+    StartAgentsRequestProperties,
+    StartAgentsRequestPropertiesAdvancedFeatures,
+    StartAgentsRequestPropertiesMllm,
+    StartAgentsRequestPropertiesMllmVendor,
+    StartAgentsRequestPropertiesTts,
+    StartAgentsRequestPropertiesTtsVendor,
+    StartAgentsRequestPropertiesLlm,
+    StartAgentsRequestPropertiesTurnDetection,
+    StartAgentsRequestPropertiesTurnDetectionType,
+)
+
+client = Agora(
+    customer_id="YOUR_CUSTOMER_ID",
+    customer_secret="YOUR_CUSTOMER_SECRET",
+)
+
+client.agents.start(
+    appid="your_app_id",
+    name="mllm_agent",
+    properties=StartAgentsRequestProperties(
+        channel="channel_name",
+        token="your_token",
+        agent_rtc_uid="1001",
+        remote_rtc_uids=["1002"],
+        idle_timeout=120,
+        advanced_features=StartAgentsRequestPropertiesAdvancedFeatures(
+            enable_mllm=True,
+        ),
+        mllm=StartAgentsRequestPropertiesMllm(
+            url="wss://api.openai.com/v1/realtime",
+            api_key="<your_openai_api_key>",
+            vendor=StartAgentsRequestPropertiesMllmVendor.OPENAI,
+            params={
+                "model": "gpt-4o-realtime-preview",
+                "voice": "alloy",
+            },
+            input_modalities=["audio"],
+            output_modalities=["text", "audio"],
+            greeting_message="Hello! I'm ready to chat in real-time.",
+        ),
+        turn_detection=StartAgentsRequestPropertiesTurnDetection(
+            type=StartAgentsRequestPropertiesTurnDetectionType.SERVER_VAD,
+            threshold=0.5,
+            silence_duration_ms=500,
+        ),
+        # TTS and LLM are still required but not used when MLLM is enabled
+        tts=StartAgentsRequestPropertiesTts(
+            vendor=StartAgentsRequestPropertiesTtsVendor.MICROSOFT,
+            params={},
+        ),
+        llm=StartAgentsRequestPropertiesLlm(
+            url="https://api.openai.com/v1/chat/completions",
+        ),
+    ),
+)
+```
+
+
+## Usage
+
+Instantiate and use the client with the following:
+
+```python
+from agora_agent import Agora, MicrosoftTtsParams, Tts_Microsoft
+from agora_agent.agents import (
+    StartAgentsRequestProperties,
+    StartAgentsRequestPropertiesAsr,
+    StartAgentsRequestPropertiesLlm,
+)
+
+client = Agora(
+    authorization="YOUR_AUTHORIZATION",
+    username="YOUR_USERNAME",
+    password="YOUR_PASSWORD",
+)
+client.agents.start(
+    appid="appid",
+    name="unique_name",
+    properties=StartAgentsRequestProperties(
+        channel="channel_name",
+        token="token",
+        agent_rtc_uid="1001",
+        remote_rtc_uids=["1002"],
+        idle_timeout=120,
+        asr=StartAgentsRequestPropertiesAsr(
+            language="en-US",
+        ),
+        tts=Tts_Microsoft(
+            params=MicrosoftTtsParams(
+                key="key",
+                region="region",
+                voice_name="voice_name",
+            ),
+        ),
+        llm=StartAgentsRequestPropertiesLlm(
+            url="https://api.openai.com/v1/chat/completions",
+            api_key="<your_llm_key>",
+            system_messages=[
+                {"role": "system", "content": "You are a helpful chatbot."}
+            ],
+            params={"model": "gpt-4o-mini"},
+            max_history=32,
+            greeting_message="Hello, how can I assist you today?",
+            failure_message="Please hold on a second.",
+        ),
+    ),
+)
+```
+
+## Async Client
+
+The SDK also exports an `async` client so that you can make non-blocking calls to our API. Note that if you are constructing an Async httpx client class to pass into this client, use `httpx.AsyncClient()` instead of `httpx.Client()` (e.g. for the `httpx_client` parameter of this client).
+
+```python
+import asyncio
+
+from agora_agent import AsyncAgora, MicrosoftTtsParams, Tts_Microsoft
+from agora_agent.agents import (
+    StartAgentsRequestProperties,
+    StartAgentsRequestPropertiesAsr,
+    StartAgentsRequestPropertiesLlm,
+)
+
+client = AsyncAgora(
+    authorization="YOUR_AUTHORIZATION",
+    username="YOUR_USERNAME",
+    password="YOUR_PASSWORD",
+)
+
+
+async def main() -> None:
+    await client.agents.start(
+        appid="appid",
+        name="unique_name",
+        properties=StartAgentsRequestProperties(
+            channel="channel_name",
+            token="token",
+            agent_rtc_uid="1001",
+            remote_rtc_uids=["1002"],
+            idle_timeout=120,
+            asr=StartAgentsRequestPropertiesAsr(
+                language="en-US",
+            ),
+            tts=Tts_Microsoft(
+                params=MicrosoftTtsParams(
+                    key="key",
+                    region="region",
+                    voice_name="voice_name",
+                ),
+            ),
+            llm=StartAgentsRequestPropertiesLlm(
+                url="https://api.openai.com/v1/chat/completions",
+                api_key="<your_llm_key>",
+                system_messages=[
+                    {"role": "system", "content": "You are a helpful chatbot."}
+                ],
+                params={"model": "gpt-4o-mini"},
+                max_history=32,
+                greeting_message="Hello, how can I assist you today?",
+                failure_message="Please hold on a second.",
+            ),
+        ),
+    )
+
+
+asyncio.run(main())
+```
+
+## Exception Handling
+
+When the API returns a non-success status code (4xx or 5xx response), a subclass of the following error
+will be thrown.
+
+```python
+from agora_agent.core.api_error import ApiError
+
+try:
+    client.agents.start(...)
+except ApiError as e:
+    print(e.status_code)
+    print(e.body)
+```
+
+## Pagination
+
+Paginated requests will return a `SyncPager` or `AsyncPager`, which can be used as generators for the underlying object.
+
+```python
+from agora_agent import Agora
+
+client = Agora(
+    authorization="YOUR_AUTHORIZATION",
+    username="YOUR_USERNAME",
+    password="YOUR_PASSWORD",
+)
+response = client.agents.list(
+    appid="appid",
+)
+for item in response:
+    yield item
+# alternatively, you can paginate page-by-page
+for page in response.iter_pages():
+    yield page
+```
+
+```python
+# You can also iterate through pages and access the typed response per page
+pager = client.agents.list(...)
+for page in pager.iter_pages():
+    print(page.response)  # access the typed response for each page
+    for item in page:
+        print(item)
+```
+
+## Advanced
+
+### Access Raw Response Data
+
+The SDK provides access to raw response data, including headers, through the `.with_raw_response` property.
+The `.with_raw_response` property returns a "raw" client that can be used to access the `.headers` and `.data` attributes.
+
+```python
+from agora_agent import Agora
+
+client = Agora(
+    ...,
+)
+response = client.agents.with_raw_response.start(...)
+print(response.headers)  # access the response headers
+print(response.data)  # access the underlying object
+pager = client.agents.list(...)
+print(pager.response)  # access the typed response for the first page
+for item in pager:
+    print(item)  # access the underlying object(s)
+for page in pager.iter_pages():
+    print(page.response)  # access the typed response for each page
+    for item in page:
+        print(item)  # access the underlying object(s)
+```
+
+### Retries
+
+The SDK is instrumented with automatic retries with exponential backoff. A request will be retried as long
+as the request is deemed retryable and the number of retry attempts has not grown larger than the configured
+retry limit (default: 2).
+
+A request is deemed retryable when any of the following HTTP status codes is returned:
+
+- [408](https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/408) (Timeout)
+- [429](https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/429) (Too Many Requests)
+- [5XX](https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/500) (Internal Server Errors)
+
+Use the `max_retries` request option to configure this behavior.
+
+```python
+client.agents.start(..., request_options={
+    "max_retries": 1
+})
+```
+
+### Timeouts
+
+The SDK defaults to a 60 second timeout. You can configure this with a timeout option at the client or request level.
+
+```python
+
+from agora_agent import Agora
+
+client = Agora(
+    ...,
+    timeout=20.0,
+)
+
+
+# Override timeout for a specific method
+client.agents.start(..., request_options={
+    "timeout_in_seconds": 1
+})
+```
+
+### Custom Client
+
+You can override the `httpx` client to customize it for your use-case. Some common use-cases include support for proxies
+and transports.
+
+```python
+import httpx
+from agora_agent import Agora
+
+client = Agora(
+    ...,
+    httpx_client=httpx.Client(
+        proxy="http://my.test.proxy.example.com",
+        transport=httpx.HTTPTransport(local_address="0.0.0.0"),
+    ),
+)
+```
+
+## Contributing
+
+While we value open-source contributions to this SDK, this library is generated programmatically.
+Additions made directly to this library would have to be moved over to our generation code,
+otherwise they would be overwritten upon the next generated release. Feel free to open a PR as
+a proof of concept, but know that we will not be able to merge it as-is. We suggest opening
+an issue first to discuss with us!
+
+On the other hand, contributions to the README are always very welcome!
diff --git a/changelog.md b/changelog.md
index 1ecf085..dac6d62 100644
--- a/changelog.md
+++ b/changelog.md
@@ -4,6 +4,27 @@ All notable changes to this project will be documented in this file.
 
 The format is based on [Keep a Changelog](https://keepachangelog.com/).
 
+## [v1.4.0] — 2026-05-13
+
+### Added
+
+- **`DeepgramTTS`** — New TTS vendor wrapper for Deepgram (Beta). Accepts `api_key`, `model`, `base_url`, `sample_rate`, `params`, and `skip_patterns`.
+- **`Agent.with_tools(enabled=True)`** — Dedicated builder method to enable MCP tool invocation (`advanced_features.enable_tools`). Replaces the raw `with_advanced_features(AdvancedFeatures(enable_tools=True))` call.
+- **LLM vendors: `headers` field** — All four LLM vendors (`OpenAI`, `AzureOpenAI`, `Anthropic`, `Gemini`) now accept an optional `headers: Dict[str, str]` parameter. Use this to pass custom HTTP headers to the LLM provider (e.g., tenant identifiers, routing headers).
+- **`AgentSession.think()` / `AsyncAgentSession.think()`** — Send a custom instruction to a running agent through the `agent_management` API.
+- **`Agent.with_interruption()`** — Configure the new top-level `interruption` object for unified interruption control.
+- **MLLM turn detection** — `OpenAIRealtime`, `GeminiLive`, and `VertexAI` now accept `turn_detection`, which maps to `mllm.turn_detection` and overrides top-level turn detection for MLLM sessions.
+- **`audio_scenario` AgentKit support** — `SessionParams` and AgentKit request construction now expose the top-level `parameters.audio_scenario` field.
+- **MLLM vendor parity** — `GeminiLive` is documented and exposed as the direct Google Gemini Live API wrapper.
+
+### Fixed
+
+- **MiniMax TTS preset stripping** — When a MiniMax reseller preset is inferred (`minimax_speech_2_6_turbo` or `minimax_speech_2_8_turbo`), the `group_id` and `url` fields are now correctly stripped from `tts.params` alongside `key` and `model`. Previously they were forwarded to the API, causing request failures.
+- **MLLM enable flag** — `Agent.with_mllm()` now sets `mllm.enable = True` and removes the deprecated `advanced_features.enable_mllm` flag from generated requests.
+- **MLLM wrapper shape** — MLLM vendors no longer emit removed fields such as `style`; docs and tests now reflect the v2.6 MLLM contract.
+- **Preset-backed OpenAI TTS** — `OpenAITTS` no longer requires `api_key` when a reseller preset supplies credentials server-side.
+- **AgentKit parity coverage** — Added regression coverage for interruption, MLLM turn detection, Deepgram TTS, LLM headers, and deprecated MLLM flag cleanup.
+
 ## [v1.3.0] — 2026-04-02
 
 ### Added
diff --git a/docs/concepts/agent.md b/docs/concepts/agent.md
index a8b80ad..0a8b159 100644
--- a/docs/concepts/agent.md
+++ b/docs/concepts/agent.md
@@ -32,7 +32,7 @@ agent = Agent(
 | `max_history` | `int` | No | Maximum conversation history length |
 | `turn_detection` | `TurnDetectionConfig` | No | Turn detection settings |
 | `sal` | `SalConfig` | No | SAL (Speech Activity Level) configuration |
-| `advanced_features` | `Dict[str, Any]` | No | Advanced features (e.g., `{'enable_mllm': True}`) |
+| `advanced_features` | `Dict[str, Any]` | No | Advanced features (e.g., `{'enable_rtm': True}`) |
 | `parameters` | `SessionParams` | No | Additional session parameters |
 | `geofence` | `GeofenceConfig` | No | Regional access restriction |
 | `labels` | `Dict[str, str]` | No | Custom key-value labels (returned in callbacks) |
@@ -60,7 +60,7 @@ Each `with_*` method returns a **new** `Agent` instance — the original is unch
 | `with_instructions(text)` | `str` | Override the system prompt |
 | `with_greeting(text)` | `str` | Override the greeting message |
 | `with_name(name)` | `str` | Override the agent name |
-| `with_turn_detection(config)` | `TurnDetectionConfig` | Override turn detection (use `config.start_of_speech` / `config.end_of_speech` for SOS/EOS) |
+| `with_turn_detection(config)` | `TurnDetectionConfig` | Override cascading-flow SOS/EOS detection; use `with_interruption()` for interruption behavior |
 | `with_sal(config)` | `SalConfig` | Set SAL configuration |
 | `with_advanced_features(features)` | `Dict[str, Any]` | Set advanced features |
 | `with_parameters(parameters)` | `SessionParams` | Set session parameters |
diff --git a/docs/concepts/vendors.md b/docs/concepts/vendors.md
index 9fb02c1..7f22d8e 100644
--- a/docs/concepts/vendors.md
+++ b/docs/concepts/vendors.md
@@ -12,7 +12,7 @@ All vendor classes are available from `agora_agent.agentkit.vendors`:
 
 <!-- snippet: executable -->
 ```python
-from agora_agent.agentkit.vendors import OpenAI, ElevenLabsTTS, DeepgramSTT
+from agora_agent.agentkit.vendors import OpenAI, ElevenLabsTTS, DeepgramTTS, DeepgramSTT
 ```
 
 ## LLM Vendors
@@ -50,6 +50,7 @@ Used with `agent.with_tts()`. Each TTS vendor produces audio at a specific sampl
 | `FishAudioTTS` | Fish Audio | `key`, `reference_id` | — |
 | `GroqTTS` | Groq | `key` | — |
 | `MiniMaxTTS` | MiniMax | `key` | — |
+| `DeepgramTTS` | Deepgram | `api_key`, `model` | Configurable |
 | `SarvamTTS` | Sarvam | `api_key` | — |
 
 <!-- snippet: executable -->
@@ -78,7 +79,6 @@ Used with `agent.with_stt()`.
 | `AmazonSTT` | Amazon Transcribe | `access_key`, `secret_key`, `region` |
 | `AssemblyAISTT` | AssemblyAI | `api_key` |
 | `AresSTT` | Ares | — (all optional) |
-| `SonioxSTT` | Soniox | `api_key`, `language` |
 | `SarvamSTT` | Sarvam | `api_key`, `language` |
 
 <!-- snippet: executable -->
@@ -94,8 +94,9 @@ Used with `agent.with_mllm()` for the [MLLM flow](../guides/mllm-flow.md). These
 
 | Class | Provider | Required Parameters |
 |---|---|---|
-| `OpenAIRealtime` | OpenAI Realtime | `api_key` |
-| `VertexAI` | Vertex AI (Gemini Live) | `model`, `project_id`, `location`, `adc_credentials_string` |
+| `OpenAIRealtime` | OpenAI Realtime | `api_key`; optional `turn_detection` |
+| `GeminiLive` | Google Gemini Live API | `api_key`, `model`; optional `turn_detection` |
+| `VertexAI` | Vertex AI (Gemini Live) | `model`, `project_id`, `location`, `adc_credentials_string`; optional `turn_detection` |
 
 <!-- snippet: executable -->
 ```python
diff --git a/docs/guides/agent-builder-features.md b/docs/guides/agent-builder-features.md
index 07e3f24..16efad6 100644
--- a/docs/guides/agent-builder-features.md
+++ b/docs/guides/agent-builder-features.md
@@ -16,6 +16,7 @@ For string values with a finite set of options (e.g. `data_channel`, `sal_mode`,
 |---|---|---|
 | `sal` | `with_sal(config)` | Selective Attention Locking — speaker recognition and noise suppression |
 | `advanced_features` | `with_advanced_features(features)` | Enable MLLM, RTM, SAL, tools |
+| `tools` | `with_tools(enabled=True)` | Enable MCP tool invocation |
 | `parameters` | `with_parameters(params)` | Silence config, farewell config, data channel |
 | `failure_message` | `with_failure_message(msg)` | Message spoken when LLM fails |
 | `max_history` | `with_max_history(n)` | Max conversation turns in LLM context |
@@ -60,13 +61,13 @@ from agora_agent.agentkit import Agent, AdvancedFeatures
 from agora_agent.agentkit.vendors import OpenAIRealtime
 
 # MLLM mode (see mllm-flow guide)
-agent = Agent(advanced_features=AdvancedFeatures(enable_mllm=True)).with_mllm(OpenAIRealtime(api_key='...'))
+agent = Agent().with_mllm(OpenAIRealtime(api_key='...'))
 
 # RTM signaling for custom data delivery
 agent = Agent(advanced_features=AdvancedFeatures(enable_rtm=True))
 
 # Enable tool invocation via MCP
-agent = Agent(advanced_features=AdvancedFeatures(enable_tools=True))
+agent = Agent().with_tools()
 ```
 
 ## Session Parameters
@@ -340,5 +341,5 @@ agent_id = session.start()
 
 - [Agent Reference](../reference/agent.md) — full API signatures
 - [Cascading Flow](./cascading-flow.md) — ASR → LLM → TTS setup
-- [MLLM Flow](./mllm-flow.md) — multimodal flow with `enable_mllm`
+- [MLLM Flow](./mllm-flow.md) — multimodal flow with `mllm.enable`
 - [Regional Routing](./regional-routing.md) — client area and geofence
diff --git a/docs/guides/low-level-api.md b/docs/guides/low-level-api.md
index 1ac5e05..6677b45 100644
--- a/docs/guides/low-level-api.md
+++ b/docs/guides/low-level-api.md
@@ -8,6 +8,13 @@ description: Direct client.agents.start() usage without the builder pattern.
 
 For full control over request payloads you can call the generated clients directly and pass raw types such as `StartAgentsRequestProperties`, `Tts_Elevenlabs`, and `StartAgentsRequestPropertiesAsr`. Use this when you need vendor or options not exposed by the agentkit, or when integrating with generated types from the API spec.
 
+## Raw telephony and phone-number APIs
+
+AgentKit focuses on realtime agent session helpers. Telephony call status, call hangup, and phone-number management are exposed through the generated low-level clients:
+
+- `client.telephony` for call status and hangup operations
+- `client.phone_numbers` for phone-number list, create, retrieve, update, and delete operations
+
 ## Cascading flow (ASR → LLM → TTS)
 
 ```python
@@ -129,11 +136,6 @@ For real-time audio with OpenAI Realtime or Google Gemini Live, use the MLLM flo
 
 ```python
 from agora_agent import Agora, Area
-from agora_agent.agentkit import (
-    AdvancedFeatures,
-    TurnDetectionConfig,
-    TurnDetectionTypeValues,
-)
 from agora_agent.agents import (
     StartAgentsRequestProperties,
     StartAgentsRequestPropertiesMllm,
@@ -159,8 +161,8 @@ client.agents.start(
         agent_rtc_uid="1001",
         remote_rtc_uids=["1002"],
         idle_timeout=120,
-        advanced_features=AdvancedFeatures(enable_mllm=True),
         mllm=StartAgentsRequestPropertiesMllm(
+            enable=True,
             url="wss://api.openai.com/v1/realtime",
             api_key="<your_openai_api_key>",
             vendor=StartAgentsRequestPropertiesMllmVendor.OPENAI,
@@ -171,18 +173,12 @@ client.agents.start(
             input_modalities=["audio"],
             output_modalities=["text", "audio"],
             greeting_message="Hello! I'm ready to chat in real-time.",
-        ),
-        turn_detection=TurnDetectionConfig(
-            type=TurnDetectionTypeValues.SERVER_VAD,  # deprecated; use config.end_of_speech instead
-            threshold=0.5,
-            silence_duration_ms=500,
-        ),
-        tts=StartAgentsRequestPropertiesTts(
-            vendor=StartAgentsRequestPropertiesTtsVendor.ELEVENLABS,
-            params={},
-        ),
-        llm=StartAgentsRequestPropertiesLlm(
-            url="https://api.openai.com/v1/chat/completions",
+            turn_detection={
+                "mode": "server_vad",
+                "server_vad_config": {
+                    "idle_timeout_ms": 5000,
+                },
+            },
         ),
     ),
 )
diff --git a/docs/guides/mllm-flow.md b/docs/guides/mllm-flow.md
index 31fa34d..15d7c3e 100644
--- a/docs/guides/mllm-flow.md
+++ b/docs/guides/mllm-flow.md
@@ -13,29 +13,26 @@ Two MLLM vendors are supported:
 - **OpenAI Realtime** — `gpt-4o-realtime-preview` and related models
 - **Gemini Live** — direct Google AI API access for audio-native Gemini models
 
-## Required: Enable MLLM Mode
+## Enable MLLM Mode
 
-MLLM mode must be explicitly enabled via `advanced_features`:
+Call `agent.with_mllm(vendor)` to enable MLLM mode. The builder sets `mllm.enable = True` automatically.
 
 ```python
-from agora_agent.agentkit import Agent, AdvancedFeatures
+from agora_agent.agentkit import Agent
 
 agent = Agent(
     name='realtime-agent',
     instructions='You are a voice assistant.',
-    advanced_features=AdvancedFeatures(enable_mllm=True),
 )
 ```
 
-Without `AdvancedFeatures(enable_mllm=True)`, the SDK treats the session as a cascading flow and requires LLM + TTS vendors.
-
 ## OpenAI Realtime
 
 ### Sync
 
 ```python
 from agora_agent import Agora, Area
-from agora_agent.agentkit import Agent, AdvancedFeatures
+from agora_agent.agentkit import Agent
 from agora_agent.agentkit.vendors import OpenAIRealtime
 
 client = Agora(
@@ -48,7 +45,6 @@ agent = (
     Agent(
         name='realtime-agent',
         instructions='You are a helpful voice assistant.',
-        advanced_features=AdvancedFeatures(enable_mllm=True),
     )
     .with_mllm(OpenAIRealtime(
         api_key='your-openai-key',
@@ -67,7 +63,7 @@ session.stop()
 ```python
 import asyncio
 from agora_agent import AsyncAgora, Area
-from agora_agent.agentkit import Agent, AdvancedFeatures
+from agora_agent.agentkit import Agent
 from agora_agent.agentkit.vendors import OpenAIRealtime
 
 async def main():
@@ -81,7 +77,6 @@ async def main():
         Agent(
             name='realtime-agent',
             instructions='You are a helpful voice assistant.',
-            advanced_features=AdvancedFeatures(enable_mllm=True),
         )
         .with_mllm(OpenAIRealtime(
             api_key='your-openai-key',
@@ -102,7 +97,7 @@ Gemini Live uses a Google AI API key:
 
 ```python
 from agora_agent import Agora, Area
-from agora_agent.agentkit import Agent, AdvancedFeatures
+from agora_agent.agentkit import Agent
 from agora_agent.agentkit.vendors import GeminiLive
 
 client = Agora(
@@ -115,7 +110,6 @@ agent = (
     Agent(
         name='gemini-agent',
         instructions='You are a helpful multilingual assistant.',
-        advanced_features=AdvancedFeatures(enable_mllm=True),
     )
     .with_mllm(GeminiLive(
         api_key='your-google-ai-api-key',
diff --git a/docs/reference/agent.md b/docs/reference/agent.md
index 87a3b16..0e7f5e8 100644
--- a/docs/reference/agent.md
+++ b/docs/reference/agent.md
@@ -16,6 +16,7 @@ Agent(
     name: Optional[str] = None,
     instructions: Optional[str] = None,
     turn_detection: Optional[TurnDetectionConfig] = None,
+    interruption: Optional[InterruptionConfig] = None,
     sal: Optional[SalConfig] = None,
     advanced_features: Optional[Dict[str, Any]] = None,
     parameters: Optional[SessionParams] = None,
@@ -34,8 +35,9 @@ Agent(
 | `name` | `Optional[str]` | `None` | Agent name, used as default session name |
 | `instructions` | `Optional[str]` | `None` | System prompt for the LLM |
 | `turn_detection` | `Optional[TurnDetectionConfig]` | `None` | Turn detection configuration |
+| `interruption` | `Optional[InterruptionConfig]` | `None` | Unified interruption control configuration |
 | `sal` | `Optional[SalConfig]` | `None` | Speech Activity Level configuration |
-| `advanced_features` | `Optional[Dict[str, Any]]` | `None` | Advanced features dict (e.g., `{'enable_mllm': True}`) |
+| `advanced_features` | `Optional[Dict[str, Any]]` | `None` | Advanced features dict (e.g., `{'enable_rtm': True}`) |
 | `parameters` | `Optional[SessionParams]` | `None` | Additional session parameters |
 | `greeting` | `Optional[str]` | `None` | Auto-spoken greeting when agent joins |
 | `failure_message` | `Optional[str]` | `None` | Spoken on error |
@@ -81,13 +83,12 @@ agent = Agent().with_stt(DeepgramSTT(api_key='your-key', language='en-US'))
 
 ### `with_mllm(vendor: BaseMLLM) -> Agent`
 
-Set the MLLM vendor for multimodal flow. Requires `AdvancedFeatures(enable_mllm=True)`.
+Set the MLLM vendor for multimodal flow. Calling `with_mllm()` automatically sets `mllm.enable = True`.
 
 <!-- snippet: fragment -->
 ```python
-from agora_agent.agentkit import AdvancedFeatures
 from agora_agent.agentkit.vendors import OpenAIRealtime
-agent = Agent(advanced_features=AdvancedFeatures(enable_mllm=True)).with_mllm(OpenAIRealtime(api_key='your-key'))
+agent = Agent().with_mllm(OpenAIRealtime(api_key='your-key'))
 ```
 
 ### `with_avatar(vendor: BaseAvatar) -> Agent`
@@ -104,7 +105,11 @@ agent = agent.with_avatar(HeyGenAvatar(api_key='your-key', quality='medium', ago
 
 ### `with_turn_detection(config: TurnDetectionConfig) -> Agent`
 
-Override turn detection settings. Use `config.start_of_speech` and `config.end_of_speech` for the preferred SOS/EOS model.
+Override cascading-flow turn detection settings. Use `config.start_of_speech` and `config.end_of_speech` for SOS/EOS detection. Use `with_interruption()` for interruption behavior and MLLM vendor `turn_detection` for MLLM turn detection.
+
+### `with_interruption(config: InterruptionConfig) -> Agent`
+
+Configure unified interruption behavior using the top-level `interruption` object. Use this for `start_of_speech` and `keywords` interruption modes.
 
 ### `with_instructions(instructions: str) -> Agent`
 
@@ -124,7 +129,11 @@ Set SAL (Selective Attention Locking) configuration.
 
 ### `with_advanced_features(features: AdvancedFeatures) -> Agent`
 
-Set advanced features (e.g. `{'enable_mllm': True}`, `{'enable_rtm': True}`).
+Set advanced features (e.g. `{'enable_rtm': True}`).
+
+### `with_tools(enabled: bool = True) -> Agent`
+
+Enable or disable MCP tool invocation by setting `advanced_features.enable_tools`.
 
 ### `with_parameters(parameters: SessionParams) -> Agent`
 
diff --git a/docs/reference/vendors.md b/docs/reference/vendors.md
index af596eb..798a7f3 100644
--- a/docs/reference/vendors.md
+++ b/docs/reference/vendors.md
@@ -10,7 +10,7 @@ All vendor classes are available from `agora_agent.agentkit.vendors`:
 
 <!-- snippet: fragment -->
 ```python
-from agora_agent.agentkit.vendors import OpenAI, ElevenLabsTTS, DeepgramSTT, OpenAIRealtime, HeyGenAvatar
+from agora_agent.agentkit.vendors import OpenAI, ElevenLabsTTS, DeepgramTTS, DeepgramSTT, OpenAIRealtime, GeminiLive, HeyGenAvatar
 ```
 
 ---
@@ -31,7 +31,11 @@ from agora_agent.agentkit.vendors import OpenAI, ElevenLabsTTS, DeepgramSTT, Ope
 | `greeting_message` | `str` | No | `None` | Greeting message |
 | `failure_message` | `str` | No | `None` | Failure message |
 | `input_modalities` | `List[str]` | No | `None` | Input modalities |
+| `output_modalities` | `List[str]` | No | `None` | Output modalities |
 | `params` | `Dict[str, Any]` | No | `None` | Additional model parameters |
+| `headers` | `Dict[str, str]` | No | `None` | Custom HTTP headers forwarded to the LLM provider |
+| `greeting_configs` | `Dict[str, Any]` | No | `None` | Greeting playback configuration |
+| `template_variables` | `Dict[str, str]` | No | `None` | Template variables for messages |
 
 <!-- snippet: fragment -->
 ```python
@@ -55,6 +59,11 @@ llm = OpenAI(api_key='your-key', model='gpt-4o-mini', temperature=0.7)
 | `greeting_message` | `str` | No | `None` | Greeting message |
 | `failure_message` | `str` | No | `None` | Failure message |
 | `input_modalities` | `List[str]` | No | `None` | Input modalities |
+| `output_modalities` | `List[str]` | No | `None` | Output modalities |
+| `params` | `Dict[str, Any]` | No | `None` | Additional model parameters |
+| `headers` | `Dict[str, str]` | No | `None` | Custom HTTP headers forwarded to the LLM provider |
+| `greeting_configs` | `Dict[str, Any]` | No | `None` | Greeting playback configuration |
+| `template_variables` | `Dict[str, str]` | No | `None` | Template variables for messages |
 
 <!-- snippet: fragment -->
 ```python
@@ -80,6 +89,11 @@ llm = AzureOpenAI(
 | `greeting_message` | `str` | No | `None` | Greeting message |
 | `failure_message` | `str` | No | `None` | Failure message |
 | `input_modalities` | `List[str]` | No | `None` | Input modalities |
+| `output_modalities` | `List[str]` | No | `None` | Output modalities |
+| `params` | `Dict[str, Any]` | No | `None` | Additional model parameters |
+| `headers` | `Dict[str, str]` | No | `None` | Custom HTTP headers forwarded to the LLM provider |
+| `greeting_configs` | `Dict[str, Any]` | No | `None` | Greeting playback configuration |
+| `template_variables` | `Dict[str, str]` | No | `None` | Template variables for messages |
 
 <!-- snippet: fragment -->
 ```python
@@ -102,6 +116,11 @@ llm = Anthropic(api_key='your-anthropic-key', model='claude-3-5-sonnet-20241022'
 | `greeting_message` | `str` | No | `None` | Greeting message |
 | `failure_message` | `str` | No | `None` | Failure message |
 | `input_modalities` | `List[str]` | No | `None` | Input modalities |
+| `output_modalities` | `List[str]` | No | `None` | Output modalities |
+| `params` | `Dict[str, Any]` | No | `None` | Additional model parameters |
+| `headers` | `Dict[str, str]` | No | `None` | Custom HTTP headers forwarded to the LLM provider |
+| `greeting_configs` | `Dict[str, Any]` | No | `None` | Greeting playback configuration |
+| `template_variables` | `Dict[str, str]` | No | `None` | Template variables for messages |
 
 <!-- snippet: fragment -->
 ```python
@@ -182,6 +201,17 @@ Fixed sample rate: 24000 Hz.
 | `voice_id` | `str` | Yes | — | Amazon Polly voice ID |
 | `skip_patterns` | `List[int]` | No | `None` | Skip patterns |
 
+### `DeepgramTTS`
+
+| Parameter | Type | Required | Default | Description |
+|---|---|---|---|---|
+| `api_key` | `str` | Yes | — | Deepgram API key |
+| `model` | `str` | Yes | — | Deepgram TTS model (e.g., `aura-2-thalia-en`) |
+| `base_url` | `str` | No | `None` | WebSocket endpoint; defaults server-side to `wss://api.deepgram.com/v1/speak` |
+| `sample_rate` | `int` | No | `None` | Sample rate in Hz (for example, `24000`) |
+| `params` | `Dict[str, Any]` | No | `None` | Additional Deepgram TTS parameters |
+| `skip_patterns` | `List[int]` | No | `None` | Skip patterns |
+
 ### `HumeAITTS`
 
 | Parameter | Type | Required | Default | Description |
@@ -313,14 +343,6 @@ Fixed sample rate: 24000 Hz.
 | `language` | `str` | No | `None` | Language code |
 | `additional_params` | `Dict[str, Any]` | No | `None` | Additional parameters |
 
-### `SonioxSTT`
-
-| Parameter | Type | Required | Default | Description |
-|---|---|---|---|---|
-| `api_key` | `str` | Yes | — | Soniox API key |
-| `language` | `str` | Yes | — | Language code (e.g., `en`) |
-| `additional_params` | `Dict[str, Any]` | No | `None` | Additional parameters |
-
 ### `SarvamSTT`
 
 | Parameter | Type | Required | Default | Description |
@@ -348,6 +370,26 @@ Fixed sample rate: 24000 Hz.
 | `output_modalities` | `List[str]` | No | `None` | Output modalities |
 | `messages` | `List[Dict]` | No | `None` | Conversation messages |
 | `params` | `Dict[str, Any]` | No | `None` | Additional parameters |
+| `turn_detection` | `MllmTurnDetectionConfig` | No | `None` | MLLM turn detection configuration; overrides top-level `turn_detection` |
+
+### `GeminiLive`
+
+| Parameter | Type | Required | Default | Description |
+|---|---|---|---|---|
+| `api_key` | `str` | Yes | — | Google Gemini API key |
+| `model` | `str` | Yes | — | Gemini Live model name |
+| `url` | `str` | No | `None` | Custom WebSocket URL |
+| `instructions` | `str` | No | `None` | System instructions |
+| `voice` | `str` | No | `None` | Voice name |
+| `greeting_message` | `str` | No | `None` | Greeting message |
+| `failure_message` | `str` | No | `None` | Message played when the model call fails |
+| `max_history` | `int` | No | `None` | Maximum conversation history length |
+| `predefined_tools` | `List[str]` | No | `None` | Predefined tools (e.g., `["_publish_message"]`) |
+| `input_modalities` | `List[str]` | No | `None` | Input modalities |
+| `output_modalities` | `List[str]` | No | `None` | Output modalities |
+| `messages` | `List[Dict]` | No | `None` | Conversation messages |
+| `additional_params` | `Dict[str, Any]` | No | `None` | Additional parameters |
+| `turn_detection` | `MllmTurnDetectionConfig` | No | `None` | MLLM turn detection configuration; overrides top-level `turn_detection` |
 
 ### `VertexAI`
 
@@ -367,6 +409,7 @@ Fixed sample rate: 24000 Hz.
 | `output_modalities` | `List[str]` | No | `None` | Output modalities |
 | `messages` | `List[Dict]` | No | `None` | Conversation messages |
 | `additional_params` | `Dict[str, Any]` | No | `None` | Additional parameters |
+| `turn_detection` | `MllmTurnDetectionConfig` | No | `None` | MLLM turn detection configuration; overrides top-level `turn_detection` |
 
 ---
 
diff --git a/poetry.lock b/poetry.lock
index 5092d3a..46f7b7b 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -38,13 +38,13 @@ trio = ["trio (>=0.26.1)"]
 
 [[package]]
 name = "certifi"
-version = "2026.2.25"
+version = "2026.4.22"
 description = "Python package for providing Mozilla's CA Bundle."
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "certifi-2026.2.25-py3-none-any.whl", hash = "sha256:027692e4402ad994f1c42e52a4997a9763c646b73e4096e4d5d6db8af1d6f0fa"},
-    {file = "certifi-2026.2.25.tar.gz", hash = "sha256:e887ab5cee78ea814d3472169153c2d12cd43b14bd03329a39a9c6e2e80bfba7"},
+    {file = "certifi-2026.4.22-py3-none-any.whl", hash = "sha256:3cb2210c8f88ba2318d29b0388d1023c8492ff72ecdde4ebdaddbb13a31b1c4a"},
+    {file = "certifi-2026.4.22.tar.gz", hash = "sha256:8d455352a37b71bf76a79caa83a3d6c25afee4a385d632127b6afb3963f1c580"},
 ]
 
 [[package]]
@@ -133,17 +133,17 @@ zstd = ["zstandard (>=0.18.0)"]
 
 [[package]]
 name = "idna"
-version = "3.11"
+version = "3.14"
 description = "Internationalized Domain Names in Applications (IDNA)"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea"},
-    {file = "idna-3.11.tar.gz", hash = "sha256:795dafcc9c04ed0c1fb032c2aa73654d8e8c5023a7df64a53f39190ada629902"},
+    {file = "idna-3.14-py3-none-any.whl", hash = "sha256:e677eaf072e290f7b725f9acf0b3a2bd55f9fd6f7c70abe5f0e34823d0accf69"},
+    {file = "idna-3.14.tar.gz", hash = "sha256:466d810d7a2cc1022bea9b037c39728d51ae7dad40d480fc9b7d7ecf98ba8ee3"},
 ]
 
 [package.extras]
-all = ["flake8 (>=7.1.1)", "mypy (>=1.11.2)", "pytest (>=8.3.2)", "ruff (>=0.6.2)"]
+all = ["mypy (>=1.11.2)", "pytest (>=8.3.2)", "ruff (>=0.6.2)"]
 
 [[package]]
 name = "iniconfig"
@@ -222,13 +222,13 @@ files = [
 
 [[package]]
 name = "packaging"
-version = "26.0"
+version = "26.2"
 description = "Core utilities for Python packages"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "packaging-26.0-py3-none-any.whl", hash = "sha256:b36f1fef9334a5588b4166f8bcd26a14e521f2b55e6b9de3aaa80d3ff7a37529"},
-    {file = "packaging-26.0.tar.gz", hash = "sha256:00243ae351a257117b6a241061796684b084ed1c516a08c48a3f7e147a9d80b4"},
+    {file = "packaging-26.2-py3-none-any.whl", hash = "sha256:5fc45236b9446107ff2415ce77c807cee2862cb6fac22b8a73826d0693b0980e"},
+    {file = "packaging-26.2.tar.gz", hash = "sha256:ff452ff5a3e828ce110190feff1178bb1f2ea2281fa2075aadb987c2fb221661"},
 ]
 
 [[package]]
diff --git a/pyproject.toml b/pyproject.toml
index b2149d7..6104af4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -3,7 +3,7 @@ name = "agora-agent-server-sdk"
 
 [tool.poetry]
 name = "agora-agent-server-sdk"
-version = "1.3.0"
+version = "v1.4.0"
 description = ""
 readme = "README.md"
 authors = []
diff --git a/reference.md b/reference.md
index 43c3768..356b016 100644
--- a/reference.md
+++ b/reference.md
@@ -916,6 +916,165 @@ client.agents.interrupt(
 </dl>
 
 
+</dd>
+</dl>
+</details>
+
+## Agent Management
+<details><summary><code>client.agent_management.<a href="src/agora_agent/agent_management/client.py">agent_think</a>(...)</code></summary>
+<dl>
+<dd>
+
+#### 📝 Description
+
+<dl>
+<dd>
+
+<dl>
+<dd>
+
+Send a custom text instruction to the specified conversational AI agent instance.
+
+The instruction is injected into the current conversation pipeline as user input, and the agent processes and responds to it following the standard user input logic.
+
+Use this endpoint for the following scenarios:
+- **Implicit instruction injection**: Inject hidden context or directives into the conversation.
+- **Client-side event triggering**: Notify the agent of client-side events, such as a user clicking a button.
+- **Voice and text collaboration**: Combine text instructions with voice input for richer interaction.
+</dd>
+</dl>
+</dd>
+</dl>
+
+#### 🔌 Usage
+
+<dl>
+<dd>
+
+<dl>
+<dd>
+
+```python
+from agora_agent import Agora
+
+client = Agora(
+    authorization="YOUR_AUTHORIZATION",
+    username="YOUR_USERNAME",
+    password="YOUR_PASSWORD",
+)
+client.agent_management.agent_think(
+    appid="appid",
+    agent_id="agentId",
+    text="The user just clicked the purchase button.",
+    on_listening_action="inject",
+    on_thinking_action="interrupt",
+    on_speaking_action="ignore",
+    interruptable=True,
+    metadata={"publisher": "user123", "model": "deepseek-r1"},
+)
+
+```
+</dd>
+</dl>
+</dd>
+</dl>
+
+#### ⚙️ Parameters
+
+<dl>
+<dd>
+
+<dl>
+<dd>
+
+**appid:** `str` — The App ID of the project.
+    
+</dd>
+</dl>
+
+<dl>
+<dd>
+
+**agent_id:** `str` — The agent instance ID you obtained after successfully calling `join` to start a conversational AI agent.
+    
+</dd>
+</dl>
+
+<dl>
+<dd>
+
+**text:** `str` — The custom instruction text to inject into the current conversation pipeline. The system processes this as user input.
+    
+</dd>
+</dl>
+
+<dl>
+<dd>
+
+**on_listening_action:** `typing.Optional[AgentThinkAgentManagementRequestOnListeningAction]` 
+
+The action to take when the agent is in a listening state:
+- `inject`: Inject the custom text instruction into the current turn without interrupting it.
+- `ignore`: Ignore the request.
+    
+</dd>
+</dl>
+
+<dl>
+<dd>
+
+**on_thinking_action:** `typing.Optional[AgentThinkAgentManagementRequestOnThinkingAction]` 
+
+The action to take when the agent is in a thinking state:
+- `interrupt`: Interrupt the current state and start a new conversation turn.
+- `ignore`: Ignore the request.
+    
+</dd>
+</dl>
+
+<dl>
+<dd>
+
+**on_speaking_action:** `typing.Optional[AgentThinkAgentManagementRequestOnSpeakingAction]` 
+
+The action to take when the agent is in a speaking state:
+- `interrupt`: Interrupt the current state and start a new conversation turn.
+- `ignore`: Ignore the request.
+    
+</dd>
+</dl>
+
+<dl>
+<dd>
+
+**interruptable:** `typing.Optional[bool]` 
+
+Whether user speech can interrupt the injected instruction:
+- `true`: User speech can interrupt the instruction.
+- `false`: User speech cannot interrupt the instruction.
+    
+</dd>
+</dl>
+
+<dl>
+<dd>
+
+**metadata:** `typing.Optional[typing.Dict[str, str]]` — Custom metadata in key-value pair format. Use this field to pass additional business information such as identifiers or model references.
+    
+</dd>
+</dl>
+
+<dl>
+<dd>
+
+**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.
+    
+</dd>
+</dl>
+</dd>
+</dl>
+
+
 </dd>
 </dl>
 </details>
diff --git a/scripts/validate_docs.py b/scripts/validate_docs.py
deleted file mode 100644
index 4d4e3d3..0000000
--- a/scripts/validate_docs.py
+++ /dev/null
@@ -1,79 +0,0 @@
-import ast
-import pathlib
-import re
-import sys
-from typing import List, Optional
-
-ROOT = pathlib.Path.cwd()
-BANNED_PATTERNS = [
-    re.compile(r'\{\{\s*owner\s*\}\}'),
-    re.compile(r'\{\{\s*repo\s*\}\}'),
-    re.compile(r'from agora-agent-server-sdk'),
-]
-# `concepts` and `reference` snippets must declare whether they are runnable examples or API fragments.
-CODE_BLOCK_RE = re.compile(
-    r'(?:(<!--\s*snippet:\s*(executable|fragment)\s*-->)[ \t]*\n)?```python\n([\s\S]*?)```'
-)
-
-
-def collect_markdown_files() -> List[pathlib.Path]:
-    return [ROOT / 'README.md', *sorted((ROOT / 'docs').rglob('*.md'))]
-
-
-def is_annotated_section(file: pathlib.Path) -> bool:
-    relative = file.relative_to(ROOT).as_posix()
-    return '/docs/concepts/' in f'/{relative}' or '/docs/reference/' in f'/{relative}'
-
-
-def snippet_mode(code: str, annotation: Optional[str]) -> str:
-    if annotation == 'fragment':
-        return 'fragment'
-    if annotation == 'executable':
-        return 'executable'
-    return 'executable'
-
-
-MARKDOWN_FILES = collect_markdown_files()
-
-failures: List[str] = []
-snippet_count = 0
-fragment_count = 0
-
-for file in MARKDOWN_FILES:
-    content = file.read_text(encoding='utf-8')
-
-    for pattern in BANNED_PATTERNS:
-        if pattern.search(content):
-            failures.append(f"{file.relative_to(ROOT)} contains banned pattern: {pattern.pattern}")
-
-    for match in CODE_BLOCK_RE.finditer(content):
-        annotation = match.group(2)
-        code = match.group(3)
-        if is_annotated_section(file) and not annotation:
-            failures.append(f"{file.relative_to(ROOT)} contains an unannotated python snippet")
-            continue
-
-        mode = snippet_mode(code, annotation)
-        if mode == 'fragment':
-            fragment_count += 1
-            continue
-
-        snippet_count += 1
-        try:
-            ast.parse(code, filename=str(file))
-        except SyntaxError as exc:
-            failures.append(f"{file.relative_to(ROOT)}:{exc.lineno}: {exc.msg}")
-
-if snippet_count == 0:
-    failures.append('No Python code blocks found in README/docs markdown.')
-
-if failures:
-    print('Documentation validation failed:', file=sys.stderr)
-    for failure in failures:
-        print(f'- {failure}', file=sys.stderr)
-    raise SystemExit(1)
-
-print(
-    f'Validated {snippet_count} executable and {fragment_count} fragment Python snippets across '
-    f'{len(MARKDOWN_FILES)} markdown files.'
-)
diff --git a/src/agora_agent/agent_management/__init__.py b/src/agora_agent/agent_management/__init__.py
new file mode 100644
index 0000000..5cde020
--- /dev/null
+++ b/src/agora_agent/agent_management/__init__.py
@@ -0,0 +1,4 @@
+# This file was auto-generated by Fern from our API Definition.
+
+# isort: skip_file
+
diff --git a/src/agora_agent/agent_management/client.py b/src/agora_agent/agent_management/client.py
new file mode 100644
index 0000000..71b3c62
--- /dev/null
+++ b/src/agora_agent/agent_management/client.py
@@ -0,0 +1,256 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+from ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
+from ..core.request_options import RequestOptions
+from .raw_client import AsyncRawAgentManagementClient, RawAgentManagementClient
+from .types.agent_think_agent_management_request_on_listening_action import (
+    AgentThinkAgentManagementRequestOnListeningAction,
+)
+from .types.agent_think_agent_management_request_on_speaking_action import (
+    AgentThinkAgentManagementRequestOnSpeakingAction,
+)
+from .types.agent_think_agent_management_request_on_thinking_action import (
+    AgentThinkAgentManagementRequestOnThinkingAction,
+)
+from .types.agent_think_agent_management_response import AgentThinkAgentManagementResponse
+
+# this is used as the default value for optional parameters
+OMIT = typing.cast(typing.Any, ...)
+
+
+class AgentManagementClient:
+    def __init__(self, *, client_wrapper: SyncClientWrapper):
+        self._raw_client = RawAgentManagementClient(client_wrapper=client_wrapper)
+
+    @property
+    def with_raw_response(self) -> RawAgentManagementClient:
+        """
+        Retrieves a raw implementation of this client that returns raw responses.
+
+        Returns
+        -------
+        RawAgentManagementClient
+        """
+        return self._raw_client
+
+    def agent_think(
+        self,
+        appid: str,
+        agent_id: str,
+        *,
+        text: str,
+        on_listening_action: typing.Optional[AgentThinkAgentManagementRequestOnListeningAction] = OMIT,
+        on_thinking_action: typing.Optional[AgentThinkAgentManagementRequestOnThinkingAction] = OMIT,
+        on_speaking_action: typing.Optional[AgentThinkAgentManagementRequestOnSpeakingAction] = OMIT,
+        interruptable: typing.Optional[bool] = OMIT,
+        metadata: typing.Optional[typing.Dict[str, str]] = OMIT,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> AgentThinkAgentManagementResponse:
+        """
+        Send a custom text instruction to the specified conversational AI agent instance.
+
+        The instruction is injected into the current conversation pipeline as user input, and the agent processes and responds to it following the standard user input logic.
+
+        Use this endpoint for the following scenarios:
+        - **Implicit instruction injection**: Inject hidden context or directives into the conversation.
+        - **Client-side event triggering**: Notify the agent of client-side events, such as a user clicking a button.
+        - **Voice and text collaboration**: Combine text instructions with voice input for richer interaction.
+
+        Parameters
+        ----------
+        appid : str
+            The App ID of the project.
+
+        agent_id : str
+            The agent instance ID you obtained after successfully calling `join` to start a conversational AI agent.
+
+        text : str
+            The custom instruction text to inject into the current conversation pipeline. The system processes this as user input.
+
+        on_listening_action : typing.Optional[AgentThinkAgentManagementRequestOnListeningAction]
+            The action to take when the agent is in a listening state:
+            - `inject`: Inject the custom text instruction into the current turn without interrupting it.
+            - `ignore`: Ignore the request.
+
+        on_thinking_action : typing.Optional[AgentThinkAgentManagementRequestOnThinkingAction]
+            The action to take when the agent is in a thinking state:
+            - `interrupt`: Interrupt the current state and start a new conversation turn.
+            - `ignore`: Ignore the request.
+
+        on_speaking_action : typing.Optional[AgentThinkAgentManagementRequestOnSpeakingAction]
+            The action to take when the agent is in a speaking state:
+            - `interrupt`: Interrupt the current state and start a new conversation turn.
+            - `ignore`: Ignore the request.
+
+        interruptable : typing.Optional[bool]
+            Whether user speech can interrupt the injected instruction:
+            - `true`: User speech can interrupt the instruction.
+            - `false`: User speech cannot interrupt the instruction.
+
+        metadata : typing.Optional[typing.Dict[str, str]]
+            Custom metadata in key-value pair format. Use this field to pass additional business information such as identifiers or model references.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        AgentThinkAgentManagementResponse
+            Request was successful. The response body contains the result of the request.
+
+        Examples
+        --------
+        from agora_agent import Agora
+
+        client = Agora(
+            authorization="YOUR_AUTHORIZATION",
+            username="YOUR_USERNAME",
+            password="YOUR_PASSWORD",
+        )
+        client.agent_management.agent_think(
+            appid="appid",
+            agent_id="agentId",
+            text="The user just clicked the purchase button.",
+            on_listening_action="inject",
+            on_thinking_action="interrupt",
+            on_speaking_action="ignore",
+            interruptable=True,
+            metadata={"publisher": "user123", "model": "deepseek-r1"},
+        )
+        """
+        _response = self._raw_client.agent_think(
+            appid,
+            agent_id,
+            text=text,
+            on_listening_action=on_listening_action,
+            on_thinking_action=on_thinking_action,
+            on_speaking_action=on_speaking_action,
+            interruptable=interruptable,
+            metadata=metadata,
+            request_options=request_options,
+        )
+        return _response.data
+
+
+class AsyncAgentManagementClient:
+    def __init__(self, *, client_wrapper: AsyncClientWrapper):
+        self._raw_client = AsyncRawAgentManagementClient(client_wrapper=client_wrapper)
+
+    @property
+    def with_raw_response(self) -> AsyncRawAgentManagementClient:
+        """
+        Retrieves a raw implementation of this client that returns raw responses.
+
+        Returns
+        -------
+        AsyncRawAgentManagementClient
+        """
+        return self._raw_client
+
+    async def agent_think(
+        self,
+        appid: str,
+        agent_id: str,
+        *,
+        text: str,
+        on_listening_action: typing.Optional[AgentThinkAgentManagementRequestOnListeningAction] = OMIT,
+        on_thinking_action: typing.Optional[AgentThinkAgentManagementRequestOnThinkingAction] = OMIT,
+        on_speaking_action: typing.Optional[AgentThinkAgentManagementRequestOnSpeakingAction] = OMIT,
+        interruptable: typing.Optional[bool] = OMIT,
+        metadata: typing.Optional[typing.Dict[str, str]] = OMIT,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> AgentThinkAgentManagementResponse:
+        """
+        Send a custom text instruction to the specified conversational AI agent instance.
+
+        The instruction is injected into the current conversation pipeline as user input, and the agent processes and responds to it following the standard user input logic.
+
+        Use this endpoint for the following scenarios:
+        - **Implicit instruction injection**: Inject hidden context or directives into the conversation.
+        - **Client-side event triggering**: Notify the agent of client-side events, such as a user clicking a button.
+        - **Voice and text collaboration**: Combine text instructions with voice input for richer interaction.
+
+        Parameters
+        ----------
+        appid : str
+            The App ID of the project.
+
+        agent_id : str
+            The agent instance ID you obtained after successfully calling `join` to start a conversational AI agent.
+
+        text : str
+            The custom instruction text to inject into the current conversation pipeline. The system processes this as user input.
+
+        on_listening_action : typing.Optional[AgentThinkAgentManagementRequestOnListeningAction]
+            The action to take when the agent is in a listening state:
+            - `inject`: Inject the custom text instruction into the current turn without interrupting it.
+            - `ignore`: Ignore the request.
+
+        on_thinking_action : typing.Optional[AgentThinkAgentManagementRequestOnThinkingAction]
+            The action to take when the agent is in a thinking state:
+            - `interrupt`: Interrupt the current state and start a new conversation turn.
+            - `ignore`: Ignore the request.
+
+        on_speaking_action : typing.Optional[AgentThinkAgentManagementRequestOnSpeakingAction]
+            The action to take when the agent is in a speaking state:
+            - `interrupt`: Interrupt the current state and start a new conversation turn.
+            - `ignore`: Ignore the request.
+
+        interruptable : typing.Optional[bool]
+            Whether user speech can interrupt the injected instruction:
+            - `true`: User speech can interrupt the instruction.
+            - `false`: User speech cannot interrupt the instruction.
+
+        metadata : typing.Optional[typing.Dict[str, str]]
+            Custom metadata in key-value pair format. Use this field to pass additional business information such as identifiers or model references.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        AgentThinkAgentManagementResponse
+            Request was successful. The response body contains the result of the request.
+
+        Examples
+        --------
+        import asyncio
+
+        from agora_agent import AsyncAgora
+
+        client = AsyncAgora(
+            authorization="YOUR_AUTHORIZATION",
+            username="YOUR_USERNAME",
+            password="YOUR_PASSWORD",
+        )
+
+
+        async def main() -> None:
+            await client.agent_management.agent_think(
+                appid="appid",
+                agent_id="agentId",
+                text="The user just clicked the purchase button.",
+                on_listening_action="inject",
+                on_thinking_action="interrupt",
+                on_speaking_action="ignore",
+                interruptable=True,
+                metadata={"publisher": "user123", "model": "deepseek-r1"},
+            )
+
+
+        asyncio.run(main())
+        """
+        _response = await self._raw_client.agent_think(
+            appid,
+            agent_id,
+            text=text,
+            on_listening_action=on_listening_action,
+            on_thinking_action=on_thinking_action,
+            on_speaking_action=on_speaking_action,
+            interruptable=interruptable,
+            metadata=metadata,
+            request_options=request_options,
+        )
+        return _response.data
diff --git a/src/agora_agent/agent_management/raw_client.py b/src/agora_agent/agent_management/raw_client.py
new file mode 100644
index 0000000..03a0838
--- /dev/null
+++ b/src/agora_agent/agent_management/raw_client.py
@@ -0,0 +1,228 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+from json.decoder import JSONDecodeError
+
+from ..core.api_error import ApiError
+from ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
+from ..core.http_response import AsyncHttpResponse, HttpResponse
+from ..core.jsonable_encoder import jsonable_encoder
+from ..core.request_options import RequestOptions
+from ..core.unchecked_base_model import construct_type
+from .types.agent_think_agent_management_request_on_listening_action import (
+    AgentThinkAgentManagementRequestOnListeningAction,
+)
+from .types.agent_think_agent_management_request_on_speaking_action import (
+    AgentThinkAgentManagementRequestOnSpeakingAction,
+)
+from .types.agent_think_agent_management_request_on_thinking_action import (
+    AgentThinkAgentManagementRequestOnThinkingAction,
+)
+from .types.agent_think_agent_management_response import AgentThinkAgentManagementResponse
+
+# this is used as the default value for optional parameters
+OMIT = typing.cast(typing.Any, ...)
+
+
+class RawAgentManagementClient:
+    def __init__(self, *, client_wrapper: SyncClientWrapper):
+        self._client_wrapper = client_wrapper
+
+    def agent_think(
+        self,
+        appid: str,
+        agent_id: str,
+        *,
+        text: str,
+        on_listening_action: typing.Optional[AgentThinkAgentManagementRequestOnListeningAction] = OMIT,
+        on_thinking_action: typing.Optional[AgentThinkAgentManagementRequestOnThinkingAction] = OMIT,
+        on_speaking_action: typing.Optional[AgentThinkAgentManagementRequestOnSpeakingAction] = OMIT,
+        interruptable: typing.Optional[bool] = OMIT,
+        metadata: typing.Optional[typing.Dict[str, str]] = OMIT,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> HttpResponse[AgentThinkAgentManagementResponse]:
+        """
+        Send a custom text instruction to the specified conversational AI agent instance.
+
+        The instruction is injected into the current conversation pipeline as user input, and the agent processes and responds to it following the standard user input logic.
+
+        Use this endpoint for the following scenarios:
+        - **Implicit instruction injection**: Inject hidden context or directives into the conversation.
+        - **Client-side event triggering**: Notify the agent of client-side events, such as a user clicking a button.
+        - **Voice and text collaboration**: Combine text instructions with voice input for richer interaction.
+
+        Parameters
+        ----------
+        appid : str
+            The App ID of the project.
+
+        agent_id : str
+            The agent instance ID you obtained after successfully calling `join` to start a conversational AI agent.
+
+        text : str
+            The custom instruction text to inject into the current conversation pipeline. The system processes this as user input.
+
+        on_listening_action : typing.Optional[AgentThinkAgentManagementRequestOnListeningAction]
+            The action to take when the agent is in a listening state:
+            - `inject`: Inject the custom text instruction into the current turn without interrupting it.
+            - `ignore`: Ignore the request.
+
+        on_thinking_action : typing.Optional[AgentThinkAgentManagementRequestOnThinkingAction]
+            The action to take when the agent is in a thinking state:
+            - `interrupt`: Interrupt the current state and start a new conversation turn.
+            - `ignore`: Ignore the request.
+
+        on_speaking_action : typing.Optional[AgentThinkAgentManagementRequestOnSpeakingAction]
+            The action to take when the agent is in a speaking state:
+            - `interrupt`: Interrupt the current state and start a new conversation turn.
+            - `ignore`: Ignore the request.
+
+        interruptable : typing.Optional[bool]
+            Whether user speech can interrupt the injected instruction:
+            - `true`: User speech can interrupt the instruction.
+            - `false`: User speech cannot interrupt the instruction.
+
+        metadata : typing.Optional[typing.Dict[str, str]]
+            Custom metadata in key-value pair format. Use this field to pass additional business information such as identifiers or model references.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        HttpResponse[AgentThinkAgentManagementResponse]
+            Request was successful. The response body contains the result of the request.
+        """
+        _response = self._client_wrapper.httpx_client.request(
+            f"v2/projects/{jsonable_encoder(appid)}/agents/{jsonable_encoder(agent_id)}/think",
+            method="POST",
+            json={
+                "text": text,
+                "on_listening_action": on_listening_action,
+                "on_thinking_action": on_thinking_action,
+                "on_speaking_action": on_speaking_action,
+                "interruptable": interruptable,
+                "metadata": metadata,
+            },
+            headers={
+                "content-type": "application/json",
+            },
+            request_options=request_options,
+            omit=OMIT,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                _data = typing.cast(
+                    AgentThinkAgentManagementResponse,
+                    construct_type(
+                        type_=AgentThinkAgentManagementResponse,  # type: ignore
+                        object_=_response.json(),
+                    ),
+                )
+                return HttpResponse(response=_response, data=_data)
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)
+        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)
+
+
+class AsyncRawAgentManagementClient:
+    def __init__(self, *, client_wrapper: AsyncClientWrapper):
+        self._client_wrapper = client_wrapper
+
+    async def agent_think(
+        self,
+        appid: str,
+        agent_id: str,
+        *,
+        text: str,
+        on_listening_action: typing.Optional[AgentThinkAgentManagementRequestOnListeningAction] = OMIT,
+        on_thinking_action: typing.Optional[AgentThinkAgentManagementRequestOnThinkingAction] = OMIT,
+        on_speaking_action: typing.Optional[AgentThinkAgentManagementRequestOnSpeakingAction] = OMIT,
+        interruptable: typing.Optional[bool] = OMIT,
+        metadata: typing.Optional[typing.Dict[str, str]] = OMIT,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> AsyncHttpResponse[AgentThinkAgentManagementResponse]:
+        """
+        Send a custom text instruction to the specified conversational AI agent instance.
+
+        The instruction is injected into the current conversation pipeline as user input, and the agent processes and responds to it following the standard user input logic.
+
+        Use this endpoint for the following scenarios:
+        - **Implicit instruction injection**: Inject hidden context or directives into the conversation.
+        - **Client-side event triggering**: Notify the agent of client-side events, such as a user clicking a button.
+        - **Voice and text collaboration**: Combine text instructions with voice input for richer interaction.
+
+        Parameters
+        ----------
+        appid : str
+            The App ID of the project.
+
+        agent_id : str
+            The agent instance ID you obtained after successfully calling `join` to start a conversational AI agent.
+
+        text : str
+            The custom instruction text to inject into the current conversation pipeline. The system processes this as user input.
+
+        on_listening_action : typing.Optional[AgentThinkAgentManagementRequestOnListeningAction]
+            The action to take when the agent is in a listening state:
+            - `inject`: Inject the custom text instruction into the current turn without interrupting it.
+            - `ignore`: Ignore the request.
+
+        on_thinking_action : typing.Optional[AgentThinkAgentManagementRequestOnThinkingAction]
+            The action to take when the agent is in a thinking state:
+            - `interrupt`: Interrupt the current state and start a new conversation turn.
+            - `ignore`: Ignore the request.
+
+        on_speaking_action : typing.Optional[AgentThinkAgentManagementRequestOnSpeakingAction]
+            The action to take when the agent is in a speaking state:
+            - `interrupt`: Interrupt the current state and start a new conversation turn.
+            - `ignore`: Ignore the request.
+
+        interruptable : typing.Optional[bool]
+            Whether user speech can interrupt the injected instruction:
+            - `true`: User speech can interrupt the instruction.
+            - `false`: User speech cannot interrupt the instruction.
+
+        metadata : typing.Optional[typing.Dict[str, str]]
+            Custom metadata in key-value pair format. Use this field to pass additional business information such as identifiers or model references.
+
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+
+        Returns
+        -------
+        AsyncHttpResponse[AgentThinkAgentManagementResponse]
+            Request was successful. The response body contains the result of the request.
+        """
+        _response = await self._client_wrapper.httpx_client.request(
+            f"v2/projects/{jsonable_encoder(appid)}/agents/{jsonable_encoder(agent_id)}/think",
+            method="POST",
+            json={
+                "text": text,
+                "on_listening_action": on_listening_action,
+                "on_thinking_action": on_thinking_action,
+                "on_speaking_action": on_speaking_action,
+                "interruptable": interruptable,
+                "metadata": metadata,
+            },
+            headers={
+                "content-type": "application/json",
+            },
+            request_options=request_options,
+            omit=OMIT,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                _data = typing.cast(
+                    AgentThinkAgentManagementResponse,
+                    construct_type(
+                        type_=AgentThinkAgentManagementResponse,  # type: ignore
+                        object_=_response.json(),
+                    ),
+                )
+                return AsyncHttpResponse(response=_response, data=_data)
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)
+        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)
diff --git a/src/agora_agent/agent_management/types/__init__.py b/src/agora_agent/agent_management/types/__init__.py
new file mode 100644
index 0000000..5cde020
--- /dev/null
+++ b/src/agora_agent/agent_management/types/__init__.py
@@ -0,0 +1,4 @@
+# This file was auto-generated by Fern from our API Definition.
+
+# isort: skip_file
+
diff --git a/src/agora_agent/agent_management/types/agent_think_agent_management_request_on_listening_action.py b/src/agora_agent/agent_management/types/agent_think_agent_management_request_on_listening_action.py
new file mode 100644
index 0000000..117d8cc
--- /dev/null
+++ b/src/agora_agent/agent_management/types/agent_think_agent_management_request_on_listening_action.py
@@ -0,0 +1,5 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+AgentThinkAgentManagementRequestOnListeningAction = typing.Union[typing.Literal["inject", "ignore"], typing.Any]
diff --git a/src/agora_agent/agent_management/types/agent_think_agent_management_request_on_speaking_action.py b/src/agora_agent/agent_management/types/agent_think_agent_management_request_on_speaking_action.py
new file mode 100644
index 0000000..3cfe7a4
--- /dev/null
+++ b/src/agora_agent/agent_management/types/agent_think_agent_management_request_on_speaking_action.py
@@ -0,0 +1,5 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+AgentThinkAgentManagementRequestOnSpeakingAction = typing.Union[typing.Literal["interrupt", "ignore"], typing.Any]
diff --git a/src/agora_agent/agent_management/types/agent_think_agent_management_request_on_thinking_action.py b/src/agora_agent/agent_management/types/agent_think_agent_management_request_on_thinking_action.py
new file mode 100644
index 0000000..996272f
--- /dev/null
+++ b/src/agora_agent/agent_management/types/agent_think_agent_management_request_on_thinking_action.py
@@ -0,0 +1,5 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+AgentThinkAgentManagementRequestOnThinkingAction = typing.Union[typing.Literal["interrupt", "ignore"], typing.Any]
diff --git a/src/agora_agent/agent_management/types/agent_think_agent_management_response.py b/src/agora_agent/agent_management/types/agent_think_agent_management_response.py
new file mode 100644
index 0000000..7e512eb
--- /dev/null
+++ b/src/agora_agent/agent_management/types/agent_think_agent_management_response.py
@@ -0,0 +1,33 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+import pydantic
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
+from ...core.unchecked_base_model import UncheckedBaseModel
+
+
+class AgentThinkAgentManagementResponse(UncheckedBaseModel):
+    agent_id: typing.Optional[str] = pydantic.Field(default=None)
+    """
+    Unique identifier of the agent instance.
+    """
+
+    channel: typing.Optional[str] = pydantic.Field(default=None)
+    """
+    The name of the RTC channel where the agent is located.
+    """
+
+    start_ts: typing.Optional[int] = pydantic.Field(default=None)
+    """
+    Timestamp indicating when the agent was created.
+    """
+
+    if IS_PYDANTIC_V2:
+        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
+    else:
+
+        class Config:
+            frozen = True
+            smart_union = True
+            extra = pydantic.Extra.allow
diff --git a/src/agora_agent/agent_management/types/agent_think_request_on_listening_action.py b/src/agora_agent/agent_management/types/agent_think_request_on_listening_action.py
new file mode 100644
index 0000000..54cca4c
--- /dev/null
+++ b/src/agora_agent/agent_management/types/agent_think_request_on_listening_action.py
@@ -0,0 +1,5 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+AgentThinkRequestOnListeningAction = typing.Union[typing.Literal["inject", "ignore"], typing.Any]
diff --git a/src/agora_agent/agent_management/types/agent_think_request_on_speaking_action.py b/src/agora_agent/agent_management/types/agent_think_request_on_speaking_action.py
new file mode 100644
index 0000000..8329197
--- /dev/null
+++ b/src/agora_agent/agent_management/types/agent_think_request_on_speaking_action.py
@@ -0,0 +1,5 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+AgentThinkRequestOnSpeakingAction = typing.Union[typing.Literal["interrupt", "ignore"], typing.Any]
diff --git a/src/agora_agent/agent_management/types/agent_think_request_on_thinking_action.py b/src/agora_agent/agent_management/types/agent_think_request_on_thinking_action.py
new file mode 100644
index 0000000..ee50877
--- /dev/null
+++ b/src/agora_agent/agent_management/types/agent_think_request_on_thinking_action.py
@@ -0,0 +1,5 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+AgentThinkRequestOnThinkingAction = typing.Union[typing.Literal["interrupt", "ignore"], typing.Any]
diff --git a/src/agora_agent/agent_management/types/agent_think_response.py b/src/agora_agent/agent_management/types/agent_think_response.py
new file mode 100644
index 0000000..3a3c646
--- /dev/null
+++ b/src/agora_agent/agent_management/types/agent_think_response.py
@@ -0,0 +1,33 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+import pydantic
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
+from ...core.unchecked_base_model import UncheckedBaseModel
+
+
+class AgentThinkResponse(UncheckedBaseModel):
+    agent_id: typing.Optional[str] = pydantic.Field(default=None)
+    """
+    Unique identifier of the agent instance.
+    """
+
+    channel: typing.Optional[str] = pydantic.Field(default=None)
+    """
+    The name of the RTC channel where the agent is located.
+    """
+
+    start_ts: typing.Optional[int] = pydantic.Field(default=None)
+    """
+    Timestamp indicating when the agent was created.
+    """
+
+    if IS_PYDANTIC_V2:
+        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
+    else:
+
+        class Config:
+            frozen = True
+            smart_union = True
+            extra = pydantic.Extra.allow
diff --git a/src/agora_agent/agentkit/__init__.py b/src/agora_agent/agentkit/__init__.py
index ed95b7c..1942bce 100644
--- a/src/agora_agent/agentkit/__init__.py
+++ b/src/agora_agent/agentkit/__init__.py
@@ -27,15 +27,33 @@
     SalMode,
     AdvancedFeatures,
     SessionParams,
+    SessionParamsInput,
     SilenceConfig,
     SilenceAction,
     FarewellConfig,
     ParametersDataChannel,
+    ParametersAudioScenario,
+    InterruptionConfig,
+    InterruptionMode,
+    MllmTurnDetectionConfig,
+    MllmTurnDetectionMode,
     LlmGreetingConfigs,
     LlmGreetingConfigsMode,
     McpServersItem,
 )
 from .agent_session import AgentSession, AgentSessionOptions, AsyncAgentSession
+from ..agent_management.types.agent_think_agent_management_response import (
+    AgentThinkAgentManagementResponse as AgentThinkResponse,
+)
+from ..agent_management.types.agent_think_agent_management_request_on_listening_action import (
+    AgentThinkAgentManagementRequestOnListeningAction as AgentThinkRequestOnListeningAction,
+)
+from ..agent_management.types.agent_think_agent_management_request_on_thinking_action import (
+    AgentThinkAgentManagementRequestOnThinkingAction as AgentThinkRequestOnThinkingAction,
+)
+from ..agent_management.types.agent_think_agent_management_request_on_speaking_action import (
+    AgentThinkAgentManagementRequestOnSpeakingAction as AgentThinkRequestOnSpeakingAction,
+)
 from .avatar_types import (
     is_akool_avatar,
     is_anam_avatar,
@@ -46,6 +64,7 @@
 )
 from .constants import (
     DataChannel,
+    AudioScenario,
     SilenceActionValues,
     SalModeValues,
     GeofenceArea,
@@ -87,6 +106,7 @@
     CartesiaSampleRate,
     CartesiaTTS,
     DeepgramSTT,
+    DeepgramTTS,
     ElevenLabsSampleRate,
     ElevenLabsTTS,
     FishAudioTTS,
@@ -151,12 +171,19 @@
     "AdvancedFeatures",
     # Session parameters types
     "SessionParams",
+    "SessionParamsInput",
     "SilenceConfig",
     "SilenceAction",
     "FarewellConfig",
     "ParametersDataChannel",
+    "ParametersAudioScenario",
+    "InterruptionConfig",
+    "InterruptionMode",
+    "MllmTurnDetectionConfig",
+    "MllmTurnDetectionMode",
     # Type-safe constants
     "DataChannel",
+    "AudioScenario",
     "SilenceActionValues",
     "SalModeValues",
     "GeofenceArea",
@@ -170,6 +197,10 @@
     "AgentSession",
     "AsyncAgentSession",
     "AgentSessionOptions",
+    "AgentThinkResponse",
+    "AgentThinkRequestOnListeningAction",
+    "AgentThinkRequestOnThinkingAction",
+    "AgentThinkRequestOnSpeakingAction",
     "AgentPresets",
     "DeepgramPresetModels",
     "OpenAIPresetModels",
@@ -201,6 +232,7 @@
     "MicrosoftTTS",
     "OpenAITTS",
     "CartesiaTTS",
+    "DeepgramTTS",
     "GoogleTTS",
     "AmazonTTS",
     "HumeAITTS",
diff --git a/src/agora_agent/agentkit/agent.py b/src/agora_agent/agentkit/agent.py
index 1e256fa..70a1bdd 100644
--- a/src/agora_agent/agentkit/agent.py
+++ b/src/agora_agent/agentkit/agent.py
@@ -2,6 +2,7 @@
 
 import time
 import typing
+import typing_extensions
 
 if typing.TYPE_CHECKING:
     from .agent_session import AgentSession, AsyncAgentSession
@@ -29,6 +30,11 @@
 from ..agents.types.start_agents_request_properties_parameters_silence_config_action import StartAgentsRequestPropertiesParametersSilenceConfigAction
 from ..agents.types.start_agents_request_properties_parameters_farewell_config import StartAgentsRequestPropertiesParametersFarewellConfig
 from ..agents.types.start_agents_request_properties_parameters_data_channel import StartAgentsRequestPropertiesParametersDataChannel
+from ..agents.types.start_agents_request_properties_parameters_audio_scenario import StartAgentsRequestPropertiesParametersAudioScenario
+from ..agents.types.start_agents_request_properties_interruption import StartAgentsRequestPropertiesInterruption
+from ..agents.types.start_agents_request_properties_interruption_mode import StartAgentsRequestPropertiesInterruptionMode
+from ..agents.types.start_agents_request_properties_mllm_turn_detection import StartAgentsRequestPropertiesMllmTurnDetection
+from ..agents.types.start_agents_request_properties_mllm_turn_detection_mode import StartAgentsRequestPropertiesMllmTurnDetectionMode
 from ..agents.types.start_agents_request_properties_llm_greeting_configs import StartAgentsRequestPropertiesLlmGreetingConfigs
 from ..agents.types.start_agents_request_properties_llm_greeting_configs_mode import StartAgentsRequestPropertiesLlmGreetingConfigsMode
 from ..agents.types.start_agents_request_properties_llm_mcp_servers_item import StartAgentsRequestPropertiesLlmMcpServersItem
@@ -82,6 +88,20 @@
 SilenceAction = StartAgentsRequestPropertiesParametersSilenceConfigAction
 FarewellConfig = StartAgentsRequestPropertiesParametersFarewellConfig
 ParametersDataChannel = StartAgentsRequestPropertiesParametersDataChannel
+ParametersAudioScenario = StartAgentsRequestPropertiesParametersAudioScenario
+InterruptionConfig = StartAgentsRequestPropertiesInterruption
+InterruptionMode = StartAgentsRequestPropertiesInterruptionMode
+MllmTurnDetectionConfig = StartAgentsRequestPropertiesMllmTurnDetection
+MllmTurnDetectionMode = StartAgentsRequestPropertiesMllmTurnDetectionMode
+
+
+class SessionParamsInput(typing_extensions.TypedDict, total=False):
+    silence_config: StartAgentsRequestPropertiesParametersSilenceConfig
+    farewell_config: StartAgentsRequestPropertiesParametersFarewellConfig
+    data_channel: StartAgentsRequestPropertiesParametersDataChannel
+    enable_metrics: bool
+    enable_error_message: bool
+    audio_scenario: ParametersAudioScenario
 
 # LLM sub-type aliases
 LlmGreetingConfigs = StartAgentsRequestPropertiesLlmGreetingConfigs
@@ -123,9 +143,10 @@ def __init__(
         name: typing.Optional[str] = None,
         instructions: typing.Optional[str] = None,
         turn_detection: typing.Optional[TurnDetectionConfig] = None,
+        interruption: typing.Optional[InterruptionConfig] = None,
         sal: typing.Optional[SalConfig] = None,
         advanced_features: typing.Optional[AdvancedFeatures] = None,
-        parameters: typing.Optional[SessionParams] = None,
+        parameters: typing.Optional[typing.Union[SessionParams, SessionParamsInput]] = None,
         greeting: typing.Optional[str] = None,
         failure_message: typing.Optional[str] = None,
         max_history: typing.Optional[int] = None,
@@ -147,6 +168,7 @@ def __init__(
         self._avatar: typing.Optional[typing.Dict[str, typing.Any]] = None
         self._avatar_required_sample_rate: typing.Optional[int] = None
         self._turn_detection = turn_detection
+        self._interruption = interruption
         self._sal = sal
         self._advanced_features = advanced_features
         self._parameters = parameters
@@ -174,6 +196,21 @@ def with_stt(self, vendor: BaseSTT) -> "Agent":
     def with_mllm(self, vendor: BaseMLLM) -> "Agent":
         new_agent = self._clone()
         new_agent._mllm = vendor.to_config()
+        if isinstance(new_agent._mllm, dict):
+            new_agent._mllm["enable"] = True
+        if isinstance(new_agent._advanced_features, dict):
+            advanced_features = {key: value for key, value in new_agent._advanced_features.items() if key != "enable_mllm"}
+            new_agent._advanced_features = typing.cast(AdvancedFeatures, advanced_features) if advanced_features else None
+        elif isinstance(new_agent._advanced_features, StartAgentsRequestPropertiesAdvancedFeatures):
+            advanced_features_model = new_agent._advanced_features.model_copy(update={"enable_mllm": None})
+            if (
+                advanced_features_model.enable_rtm is None
+                and advanced_features_model.enable_sal is None
+                and advanced_features_model.enable_tools is None
+            ):
+                new_agent._advanced_features = None
+            else:
+                new_agent._advanced_features = advanced_features_model
         return new_agent
 
     def with_avatar(self, vendor: BaseAvatar) -> "Agent":
@@ -198,6 +235,12 @@ def with_turn_detection(self, config: TurnDetectionConfig) -> "Agent":
         new_agent._turn_detection = config
         return new_agent
 
+    def with_interruption(self, config: InterruptionConfig) -> "Agent":
+        """Returns a new Agent with unified interruption control configured."""
+        new_agent = self._clone()
+        new_agent._interruption = config
+        return new_agent
+
     def with_instructions(self, instructions: str) -> "Agent":
         new_agent = self._clone()
         new_agent._instructions = instructions
@@ -222,13 +265,27 @@ def with_sal(self, config: SalConfig) -> "Agent":
     def with_advanced_features(self, features: AdvancedFeatures) -> "Agent":
         """Returns a new Agent with the specified advanced features configuration.
 
-        Use this to enable MLLM mode (``{"enable_mllm": True}``), RTM, and other features.
+        Use this to enable RTM and other advanced features.
         """
         new_agent = self._clone()
         new_agent._advanced_features = features
         return new_agent
 
-    def with_parameters(self, parameters: SessionParams) -> "Agent":
+    def with_tools(self, enabled: bool = True) -> "Agent":
+        """Returns a new Agent with MCP tool invocation enabled or disabled."""
+        new_agent = self._clone()
+        if new_agent._advanced_features is None:
+            new_agent._advanced_features = StartAgentsRequestPropertiesAdvancedFeatures(enable_tools=enabled)
+        elif isinstance(new_agent._advanced_features, dict):
+            new_agent._advanced_features = typing.cast(
+                AdvancedFeatures,
+                {**new_agent._advanced_features, "enable_tools": enabled},
+            )
+        else:
+            new_agent._advanced_features = new_agent._advanced_features.model_copy(update={"enable_tools": enabled})
+        return new_agent
+
+    def with_parameters(self, parameters: typing.Union[SessionParams, SessionParamsInput]) -> "Agent":
         """Returns a new Agent with the specified session parameters.
 
         Use this to configure silence behaviour, graceful hang-up, data channel, and more.
@@ -309,6 +366,10 @@ def mllm(self) -> typing.Optional[typing.Dict[str, typing.Any]]:
     def turn_detection(self) -> typing.Optional[TurnDetectionConfig]:
         return self._turn_detection
 
+    @property
+    def interruption(self) -> typing.Optional[InterruptionConfig]:
+        return self._interruption
+
     @property
     def instructions(self) -> typing.Optional[str]:
         return self._instructions
@@ -338,7 +399,7 @@ def advanced_features(self) -> typing.Optional[AdvancedFeatures]:
         return self._advanced_features
 
     @property
-    def parameters(self) -> typing.Optional[SessionParams]:
+    def parameters(self) -> typing.Optional[typing.Union[SessionParams, SessionParamsInput]]:
         return self._parameters
 
     @property
@@ -370,6 +431,7 @@ def config(self) -> typing.Dict[str, typing.Any]:
             "stt": self._stt,
             "mllm": self._mllm,
             "turn_detection": self._turn_detection,
+            "interruption": self._interruption,
             "sal": self._sal,
             "avatar": self._avatar,
             "advanced_features": self._advanced_features,
@@ -491,13 +553,8 @@ def to_properties(
                 **token_kwargs,
             )
 
-        is_mllm_mode = (
-            self._advanced_features is not None
-            and (
-                (isinstance(self._advanced_features, dict) and self._advanced_features.get("enable_mllm") is True)
-                or (isinstance(self._advanced_features, StartAgentsRequestPropertiesAdvancedFeatures) and self._advanced_features.enable_mllm is True)
-            )
-        )
+        mllm_flag = isinstance(self._mllm, dict) and self._mllm.get("enable") is True
+        is_mllm_mode = bool(mllm_flag or self._mllm is not None)
 
         base_kwargs: typing.Dict[str, typing.Any] = {
             "channel": channel,
@@ -514,6 +571,8 @@ def to_properties(
             base_kwargs["mllm"] = self._mllm
         if self._turn_detection is not None:
             base_kwargs["turn_detection"] = self._turn_detection
+        if self._interruption is not None:
+            base_kwargs["interruption"] = self._interruption
         if self._sal is not None:
             base_kwargs["sal"] = self._sal
         if self._avatar is not None:
@@ -521,7 +580,10 @@ def to_properties(
         if self._advanced_features is not None:
             base_kwargs["advanced_features"] = self._advanced_features
         if self._parameters is not None:
-            base_kwargs["parameters"] = self._parameters
+            if isinstance(self._parameters, dict):
+                base_kwargs["parameters"] = StartAgentsRequestPropertiesParameters(**self._parameters)
+            else:
+                base_kwargs["parameters"] = self._parameters
         if self._geofence is not None:
             base_kwargs["geofence"] = self._geofence
         if self._labels is not None:
@@ -582,6 +644,7 @@ def _clone(self) -> "Agent":
         new_agent._avatar = self._avatar
         new_agent._avatar_required_sample_rate = self._avatar_required_sample_rate
         new_agent._turn_detection = self._turn_detection
+        new_agent._interruption = self._interruption
         new_agent._sal = self._sal
         new_agent._advanced_features = self._advanced_features
         new_agent._parameters = self._parameters
diff --git a/src/agora_agent/agentkit/agent_session.py b/src/agora_agent/agentkit/agent_session.py
index 7af4cf2..2408659 100644
--- a/src/agora_agent/agentkit/agent_session.py
+++ b/src/agora_agent/agentkit/agent_session.py
@@ -2,6 +2,18 @@
 import warnings
 
 from ..core.api_error import ApiError
+from ..agent_management.types.agent_think_agent_management_request_on_listening_action import (
+    AgentThinkAgentManagementRequestOnListeningAction as AgentThinkRequestOnListeningAction,
+)
+from ..agent_management.types.agent_think_agent_management_request_on_speaking_action import (
+    AgentThinkAgentManagementRequestOnSpeakingAction as AgentThinkRequestOnSpeakingAction,
+)
+from ..agent_management.types.agent_think_agent_management_request_on_thinking_action import (
+    AgentThinkAgentManagementRequestOnThinkingAction as AgentThinkRequestOnThinkingAction,
+)
+from ..agent_management.types.agent_think_agent_management_response import (
+    AgentThinkAgentManagementResponse as AgentThinkResponse,
+)
 from ..agents.types.start_agents_request_properties import StartAgentsRequestProperties
 from .agent import Agent
 from .avatar_types import (
@@ -126,6 +138,11 @@ def raw(self) -> typing.Any:
         """
         return self._client.agents
 
+    @property
+    def raw_agent_management(self) -> typing.Any:
+        """Direct access to the underlying Fern-generated AgentManagement client."""
+        return self._client.agent_management
+
     # ------------------------------------------------------------------
     # Internal helpers
     # ------------------------------------------------------------------
@@ -205,10 +222,10 @@ def _dump_model(value: typing.Any) -> typing.Any:
         return value
 
     def _is_mllm_mode(self) -> bool:
-        advanced_features = self._agent.advanced_features
-        if isinstance(advanced_features, dict):
-            return advanced_features.get("enable_mllm") is True
-        return bool(getattr(advanced_features, "enable_mllm", False))
+        mllm = self._agent.mllm
+        if isinstance(mllm, dict) and mllm.get("enable") is True:
+            return True
+        return mllm is not None
 
     def _build_start_properties(self, token_opts: typing.Dict[str, typing.Any]) -> typing.Dict[str, typing.Any]:
         base_properties = self._agent.to_properties(
@@ -457,6 +474,41 @@ def interrupt(self) -> None:
             self._app_id, self._agent_id, request_options=self._request_options()
         )
 
+    def think(
+        self,
+        text: str,
+        *,
+        on_listening_action: typing.Optional[AgentThinkRequestOnListeningAction] = None,
+        on_thinking_action: typing.Optional[AgentThinkRequestOnThinkingAction] = None,
+        on_speaking_action: typing.Optional[AgentThinkRequestOnSpeakingAction] = None,
+        interruptable: typing.Optional[bool] = None,
+        metadata: typing.Optional[typing.Dict[str, str]] = None,
+    ) -> AgentThinkResponse:
+        """Inject a custom text instruction into the current session pipeline."""
+        if self._status != "running":
+            raise RuntimeError(f"Cannot think in {self._status} state")
+        if not self._agent_id:
+            raise RuntimeError("No agent ID available")
+
+        kwargs: typing.Dict[str, typing.Any] = {"text": text}
+        if on_listening_action is not None:
+            kwargs["on_listening_action"] = on_listening_action
+        if on_thinking_action is not None:
+            kwargs["on_thinking_action"] = on_thinking_action
+        if on_speaking_action is not None:
+            kwargs["on_speaking_action"] = on_speaking_action
+        if interruptable is not None:
+            kwargs["interruptable"] = interruptable
+        if metadata is not None:
+            kwargs["metadata"] = metadata
+
+        return self._client.agent_management.agent_think(
+            self._app_id,
+            self._agent_id,
+            request_options=self._request_options(),
+            **kwargs,
+        )
+
     def update(self, properties: typing.Any) -> None:
         """Update the agent configuration at runtime.
 
@@ -672,6 +724,41 @@ async def interrupt(self) -> None:
             self._app_id, self._agent_id, request_options=self._request_options()
         )
 
+    async def think(
+        self,
+        text: str,
+        *,
+        on_listening_action: typing.Optional[AgentThinkRequestOnListeningAction] = None,
+        on_thinking_action: typing.Optional[AgentThinkRequestOnThinkingAction] = None,
+        on_speaking_action: typing.Optional[AgentThinkRequestOnSpeakingAction] = None,
+        interruptable: typing.Optional[bool] = None,
+        metadata: typing.Optional[typing.Dict[str, str]] = None,
+    ) -> AgentThinkResponse:
+        """Inject a custom text instruction into the current session pipeline."""
+        if self._status != "running":
+            raise RuntimeError(f"Cannot think in {self._status} state")
+        if not self._agent_id:
+            raise RuntimeError("No agent ID available")
+
+        kwargs: typing.Dict[str, typing.Any] = {"text": text}
+        if on_listening_action is not None:
+            kwargs["on_listening_action"] = on_listening_action
+        if on_thinking_action is not None:
+            kwargs["on_thinking_action"] = on_thinking_action
+        if on_speaking_action is not None:
+            kwargs["on_speaking_action"] = on_speaking_action
+        if interruptable is not None:
+            kwargs["interruptable"] = interruptable
+        if metadata is not None:
+            kwargs["metadata"] = metadata
+
+        return await self._client.agent_management.agent_think(
+            self._app_id,
+            self._agent_id,
+            request_options=self._request_options(),
+            **kwargs,
+        )
+
     async def update(self, properties: typing.Any) -> None:
         """Update the agent configuration at runtime.
 
diff --git a/src/agora_agent/agentkit/constants.py b/src/agora_agent/agentkit/constants.py
index eb63ee0..f86e4d3 100644
--- a/src/agora_agent/agentkit/constants.py
+++ b/src/agora_agent/agentkit/constants.py
@@ -8,6 +8,11 @@ class DataChannel:
     RTM = "rtm"
     DATASTREAM = "datastream"
 
+class AudioScenario:
+    DEFAULT = "default"
+    CHORUS = "chorus"
+    AISERVER = "aiserver"
+
 
 # Silence action when timeout elapses: "speak" | "think"
 # (Use for parameters.silence_config.action — avoids shadowing SilenceAction type)
diff --git a/src/agora_agent/agentkit/presets.py b/src/agora_agent/agentkit/presets.py
index d73f15c..dcd9680 100644
--- a/src/agora_agent/agentkit/presets.py
+++ b/src/agora_agent/agentkit/presets.py
@@ -169,6 +169,8 @@ def strip_inferred_preset_fields(properties: typing.Dict[str, typing.Any], infer
             if inferred_preset == _MINIMAX_MODEL_TO_PRESET.get(_normalize_model_name(params.get("model")) or ""):
                 params["model"] = None
             params["key"] = None
+            params["group_id"] = None
+            params["url"] = None
         tts = {k: v for k, v in {**tts, "params": _omit_none(params)}.items() if v is not None}
 
     return {**properties, "asr": asr, "llm": llm, "tts": tts}
diff --git a/src/agora_agent/agentkit/vendors/__init__.py b/src/agora_agent/agentkit/vendors/__init__.py
index 589c979..0320843 100644
--- a/src/agora_agent/agentkit/vendors/__init__.py
+++ b/src/agora_agent/agentkit/vendors/__init__.py
@@ -28,6 +28,7 @@
 from .tts import (
     AmazonTTS,
     CartesiaTTS,
+    DeepgramTTS,
     ElevenLabsTTS,
     FishAudioTTS,
     GoogleTTS,
@@ -60,6 +61,7 @@
     "MicrosoftTTS",
     "OpenAITTS",
     "CartesiaTTS",
+    "DeepgramTTS",
     "GoogleTTS",
     "AmazonTTS",
     "HumeAITTS",
diff --git a/src/agora_agent/agentkit/vendors/avatar.py b/src/agora_agent/agentkit/vendors/avatar.py
index 74f85ad..b83a356 100644
--- a/src/agora_agent/agentkit/vendors/avatar.py
+++ b/src/agora_agent/agentkit/vendors/avatar.py
@@ -1,3 +1,4 @@
+import warnings
 from typing import Any, Dict, Optional
 
 from pydantic import BaseModel, ConfigDict, Field, field_validator
@@ -31,7 +32,14 @@ def validate_quality(cls, v: str) -> str:
         return v
 
 class HeyGenAvatar(BaseAvatar):
+    """Deprecated: HeyGen has been renamed to LiveAvatar. Use LiveAvatarAvatar instead."""
+
     def __init__(self, **kwargs: Any):
+        warnings.warn(
+            "HeyGenAvatar is deprecated; use LiveAvatarAvatar instead.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
         self.options = HeyGenAvatarOptions(**kwargs)
 
     @property
diff --git a/src/agora_agent/agentkit/vendors/base.py b/src/agora_agent/agentkit/vendors/base.py
index 7b8a5ca..f4c4ce0 100644
--- a/src/agora_agent/agentkit/vendors/base.py
+++ b/src/agora_agent/agentkit/vendors/base.py
@@ -65,8 +65,8 @@ class BaseMLLM(ABC):
     """Abstract base class for all MLLM (multimodal LLM) vendor implementations.
 
     When an MLLM is configured via :meth:`~agora_agent.agentkit.Agent.with_mllm`,
-    the ``enable_mllm`` flag is set on the request and the ``llm``/``tts`` fields
-    are omitted.  Subclasses must implement :meth:`to_config` to return a dict
+    the ``mllm.enable`` flag is set on the request and the ``llm``/``tts`` fields
+    are omitted. Subclasses must implement :meth:`to_config` to return a dict
     that maps to the ``mllm`` field of the payload.
     """
 
diff --git a/src/agora_agent/agentkit/vendors/llm.py b/src/agora_agent/agentkit/vendors/llm.py
index 34c4ce2..7465c9f 100644
--- a/src/agora_agent/agentkit/vendors/llm.py
+++ b/src/agora_agent/agentkit/vendors/llm.py
@@ -29,6 +29,7 @@ class OpenAIOptions(BaseModel):
     failure_message: Optional[str] = Field(default=None)
     input_modalities: Optional[List[str]] = Field(default=None)
     params: Optional[Dict[str, Any]] = Field(default=None)
+    headers: Optional[Dict[str, str]] = Field(default=None)
     output_modalities: Optional[List[str]] = Field(default=None)
     greeting_configs: Optional[Dict[str, Any]] = Field(default=None)
     template_variables: Optional[Dict[str, str]] = Field(default=None)
@@ -61,6 +62,8 @@ def to_config(self) -> Dict[str, Any]:
         }
         if self.options.api_key is not None:
             config["api_key"] = self.options.api_key
+        if self.options.headers is not None:
+            config["headers"] = self.options.headers
 
         if self.options.system_messages is not None:
             config["system_messages"] = self.options.system_messages
@@ -99,6 +102,7 @@ class AzureOpenAIOptions(BaseModel):
     failure_message: Optional[str] = Field(default=None)
     input_modalities: Optional[List[str]] = Field(default=None)
     params: Optional[Dict[str, Any]] = Field(default=None)
+    headers: Optional[Dict[str, str]] = Field(default=None)
     output_modalities: Optional[List[str]] = Field(default=None)
     greeting_configs: Optional[Dict[str, Any]] = Field(default=None)
     template_variables: Optional[Dict[str, str]] = Field(default=None)
@@ -134,6 +138,8 @@ def to_config(self) -> Dict[str, Any]:
             params["max_tokens"] = self.options.max_tokens
         if params:
             config["params"] = params
+        if self.options.headers is not None:
+            config["headers"] = self.options.headers
 
         if self.options.system_messages is not None:
             config["system_messages"] = self.options.system_messages
@@ -169,6 +175,7 @@ class AnthropicOptions(BaseModel):
     failure_message: Optional[str] = Field(default=None)
     input_modalities: Optional[List[str]] = Field(default=None)
     params: Optional[Dict[str, Any]] = Field(default=None)
+    headers: Optional[Dict[str, str]] = Field(default=None)
     output_modalities: Optional[List[str]] = Field(default=None)
     greeting_configs: Optional[Dict[str, Any]] = Field(default=None)
     template_variables: Optional[Dict[str, str]] = Field(default=None)
@@ -200,6 +207,8 @@ def to_config(self) -> Dict[str, Any]:
 
         if self.options.system_messages is not None:
             config["system_messages"] = self.options.system_messages
+        if self.options.headers is not None:
+            config["headers"] = self.options.headers
         if self.options.greeting_message is not None:
             config["greeting_message"] = self.options.greeting_message
         if self.options.failure_message is not None:
@@ -235,6 +244,7 @@ class GeminiOptions(BaseModel):
     failure_message: Optional[str] = Field(default=None)
     input_modalities: Optional[List[str]] = Field(default=None)
     params: Optional[Dict[str, Any]] = Field(default=None)
+    headers: Optional[Dict[str, str]] = Field(default=None)
     output_modalities: Optional[List[str]] = Field(default=None)
     greeting_configs: Optional[Dict[str, Any]] = Field(default=None)
     template_variables: Optional[Dict[str, str]] = Field(default=None)
@@ -268,6 +278,8 @@ def to_config(self) -> Dict[str, Any]:
 
         if self.options.system_messages is not None:
             config["system_messages"] = self.options.system_messages
+        if self.options.headers is not None:
+            config["headers"] = self.options.headers
         if self.options.greeting_message is not None:
             config["greeting_message"] = self.options.greeting_message
         if self.options.failure_message is not None:
diff --git a/src/agora_agent/agentkit/vendors/mllm.py b/src/agora_agent/agentkit/vendors/mllm.py
index 8deb5df..5f6f940 100644
--- a/src/agora_agent/agentkit/vendors/mllm.py
+++ b/src/agora_agent/agentkit/vendors/mllm.py
@@ -2,8 +2,13 @@
 
 from pydantic import BaseModel, ConfigDict, Field
 
+from ...agents.types.start_agents_request_properties_mllm_turn_detection import (
+    StartAgentsRequestPropertiesMllmTurnDetection,
+)
 from .base import BaseMLLM
 
+MllmTurnDetectionConfig = StartAgentsRequestPropertiesMllmTurnDetection
+
 
 class OpenAIRealtimeOptions(BaseModel):
     model_config = ConfigDict(extra="forbid")
@@ -16,6 +21,7 @@ class OpenAIRealtimeOptions(BaseModel):
     output_modalities: Optional[List[str]] = Field(default=None, description="Output modalities")
     messages: Optional[List[Dict[str, Any]]] = Field(default=None, description="Conversation messages")
     params: Optional[Dict[str, Any]] = Field(default=None, description="Additional parameters")
+    turn_detection: Optional[MllmTurnDetectionConfig] = Field(default=None, description="MLLM turn detection configuration")
     predefined_tools: Optional[List[str]] = Field(default=None, description="Predefined tools")
     failure_message: Optional[str] = Field(default=None, description="Message played on failure")
     max_history: Optional[int] = Field(default=None, description="Maximum conversation history length")
@@ -27,7 +33,6 @@ def __init__(self, **kwargs: Any):
     def to_config(self) -> Dict[str, Any]:
         config: Dict[str, Any] = {
             "vendor": "openai",
-            "style": "openai",
             "api_key": self.options.api_key,
         }
 
@@ -54,6 +59,8 @@ def to_config(self) -> Dict[str, Any]:
             config["failure_message"] = self.options.failure_message
         if self.options.max_history is not None:
             config["max_history"] = self.options.max_history
+        if self.options.turn_detection is not None:
+            config["turn_detection"] = self.options.turn_detection
 
         return config
 
@@ -73,6 +80,7 @@ class VertexAIOptions(BaseModel):
     output_modalities: Optional[List[str]] = Field(default=None, description="Output modalities")
     messages: Optional[List[Dict[str, Any]]] = Field(default=None, description="Conversation messages")
     additional_params: Optional[Dict[str, Any]] = Field(default=None, description="Additional parameters")
+    turn_detection: Optional[MllmTurnDetectionConfig] = Field(default=None, description="MLLM turn detection configuration")
     predefined_tools: Optional[List[str]] = Field(default=None, description="Predefined tools")
     failure_message: Optional[str] = Field(default=None, description="Message played on failure")
     max_history: Optional[int] = Field(default=None, description="Maximum conversation history length")
@@ -98,7 +106,6 @@ def to_config(self) -> Dict[str, Any]:
 
         config: Dict[str, Any] = {
             "vendor": "vertexai",
-            "style": "openai",
             "params": params,
         }
 
@@ -118,6 +125,8 @@ def to_config(self) -> Dict[str, Any]:
             config["failure_message"] = self.options.failure_message
         if self.options.max_history is not None:
             config["max_history"] = self.options.max_history
+        if self.options.turn_detection is not None:
+            config["turn_detection"] = self.options.turn_detection
 
         return config
 
@@ -135,6 +144,7 @@ class GeminiLiveOptions(BaseModel):
     output_modalities: Optional[List[str]] = Field(default=None, description="Output modalities")
     messages: Optional[List[Dict[str, Any]]] = Field(default=None, description="Conversation messages")
     additional_params: Optional[Dict[str, Any]] = Field(default=None, description="Additional parameters")
+    turn_detection: Optional[MllmTurnDetectionConfig] = Field(default=None, description="MLLM turn detection configuration")
     predefined_tools: Optional[List[str]] = Field(default=None, description="Predefined tools")
     failure_message: Optional[str] = Field(default=None, description="Message played on failure")
     max_history: Optional[int] = Field(default=None, description="Maximum conversation history length")
@@ -155,7 +165,6 @@ def to_config(self) -> Dict[str, Any]:
 
         config: Dict[str, Any] = {
             "vendor": "gemini",
-            "style": "openai",
             "api_key": self.options.api_key,
             "params": params,
         }
@@ -176,5 +185,7 @@ def to_config(self) -> Dict[str, Any]:
             config["failure_message"] = self.options.failure_message
         if self.options.max_history is not None:
             config["max_history"] = self.options.max_history
+        if self.options.turn_detection is not None:
+            config["turn_detection"] = self.options.turn_detection
 
         return config
diff --git a/src/agora_agent/agentkit/vendors/tts.py b/src/agora_agent/agentkit/vendors/tts.py
index 935479e..557ea56 100644
--- a/src/agora_agent/agentkit/vendors/tts.py
+++ b/src/agora_agent/agentkit/vendors/tts.py
@@ -226,6 +226,42 @@ def to_config(self) -> Dict[str, Any]:
         return result
 
 
+class DeepgramTTSOptions(BaseModel):
+    model_config = ConfigDict(extra="forbid")
+
+    api_key: str = Field(..., description="Deepgram API key")
+    model: str = Field(..., description="Deepgram TTS model (e.g., 'aura-2-thalia-en')")
+    base_url: Optional[str] = Field(default=None, description="WebSocket endpoint")
+    sample_rate: Optional[int] = Field(default=None, description="Sample rate in Hz")
+    params: Optional[Dict[str, Any]] = Field(default=None, description="Additional Deepgram TTS parameters")
+    skip_patterns: Optional[List[int]] = Field(default=None)
+
+class DeepgramTTS(BaseTTS):
+    def __init__(self, **kwargs: Any):
+        self.options = DeepgramTTSOptions(**kwargs)
+
+    @property
+    def sample_rate(self) -> Optional[int]:
+        return self.options.sample_rate
+
+    def to_config(self) -> Dict[str, Any]:
+        params: Dict[str, Any] = {
+            "api_key": self.options.api_key,
+            "model": self.options.model,
+            **(self.options.params or {}),
+        }
+
+        if self.options.base_url is not None:
+            params["base_url"] = self.options.base_url
+        if self.options.sample_rate is not None:
+            params["sample_rate"] = self.options.sample_rate
+
+        result: Dict[str, Any] = {"vendor": "deepgram", "params": params}
+        if self.options.skip_patterns is not None:
+            result["skip_patterns"] = self.options.skip_patterns
+        return result
+
+
 class HumeAITTSOptions(BaseModel):
     model_config = ConfigDict(extra="forbid")
 
diff --git a/src/agora_agent/agents/types/start_agents_request_properties.py b/src/agora_agent/agents/types/start_agents_request_properties.py
index e6bb1d1..06c3482 100644
--- a/src/agora_agent/agents/types/start_agents_request_properties.py
+++ b/src/agora_agent/agents/types/start_agents_request_properties.py
@@ -11,6 +11,7 @@
 from .start_agents_request_properties_avatar import StartAgentsRequestPropertiesAvatar
 from .start_agents_request_properties_filler_words import StartAgentsRequestPropertiesFillerWords
 from .start_agents_request_properties_geofence import StartAgentsRequestPropertiesGeofence
+from .start_agents_request_properties_interruption import StartAgentsRequestPropertiesInterruption
 from .start_agents_request_properties_llm import StartAgentsRequestPropertiesLlm
 from .start_agents_request_properties_mllm import StartAgentsRequestPropertiesMllm
 from .start_agents_request_properties_parameters import StartAgentsRequestPropertiesParameters
@@ -36,7 +37,7 @@ class StartAgentsRequestProperties(UncheckedBaseModel):
 
     agent_rtc_uid: str = pydantic.Field()
     """
-    The user ID of the agent in the channel. A value of `0` means that a random UID is generated and assigned. Set the `token` accordingly.
+    The user ID of the agent in the channel. All UIDs within an RTC channel must be unique. Ensure no other user or service bot is using this UID. A value of `0` means that a unique random UID is generated and assigned. Set the `token` accordingly.
     """
 
     remote_rtc_uids: typing.List[str] = pydantic.Field()
@@ -93,7 +94,12 @@ class StartAgentsRequestProperties(UncheckedBaseModel):
 
     turn_detection: typing.Optional[StartAgentsRequestPropertiesTurnDetection] = pydantic.Field(default=None)
     """
-    Conversation turn detection settings. Controls the logic for voice activity detection and conversation turn determination.
+    Conversation turn detection settings. Controls the logic for voice activity detection and conversation turn determination. This object has no effect when `mllm.enable` is true; use `mllm.turn_detection` instead.
+    """
+
+    interruption: typing.Optional[StartAgentsRequestPropertiesInterruption] = pydantic.Field(default=None)
+    """
+    Interruption control configuration. Provides unified management of the agent's behavior when interrupted by the user.
     """
 
     sal: typing.Optional[StartAgentsRequestPropertiesSal] = pydantic.Field(default=None)
diff --git a/src/agora_agent/agents/types/start_agents_request_properties_advanced_features.py b/src/agora_agent/agents/types/start_agents_request_properties_advanced_features.py
index 55480a4..78250d7 100644
--- a/src/agora_agent/agents/types/start_agents_request_properties_advanced_features.py
+++ b/src/agora_agent/agents/types/start_agents_request_properties_advanced_features.py
@@ -14,7 +14,7 @@ class StartAgentsRequestPropertiesAdvancedFeatures(UncheckedBaseModel):
 
     enable_mllm: typing.Optional[bool] = pydantic.Field(default=None)
     """
-    Enable Multimodal Large Language Model for voice-to-voice processing. Enabling MLLM automatically disables ASR, LLM, and TTS since the MLLM handles end-to-end voice processing directly. See `turn_detection.type` for turn detection options available with MLLM.
+    Use `mllm.enable` instead. Enable Multimodal Large Language Model for voice-to-voice processing. Enabling MLLM automatically disables ASR, LLM, and TTS since the MLLM handles end-to-end voice processing directly. See `turn_detection.mode` for turn detection options available with MLLM.
     """
 
     enable_rtm: typing.Optional[bool] = pydantic.Field(default=None)
diff --git a/src/agora_agent/agents/types/start_agents_request_properties_interruption.py b/src/agora_agent/agents/types/start_agents_request_properties_interruption.py
new file mode 100644
index 0000000..4807e56
--- /dev/null
+++ b/src/agora_agent/agents/types/start_agents_request_properties_interruption.py
@@ -0,0 +1,57 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+import pydantic
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
+from ...core.unchecked_base_model import UncheckedBaseModel
+from .start_agents_request_properties_interruption_disabled_config import (
+    StartAgentsRequestPropertiesInterruptionDisabledConfig,
+)
+from .start_agents_request_properties_interruption_keywords_config import (
+    StartAgentsRequestPropertiesInterruptionKeywordsConfig,
+)
+from .start_agents_request_properties_interruption_mode import StartAgentsRequestPropertiesInterruptionMode
+
+
+class StartAgentsRequestPropertiesInterruption(UncheckedBaseModel):
+    """
+    Interruption control configuration. Provides unified management of the agent's behavior when interrupted by the user.
+    """
+
+    enable: typing.Optional[bool] = pydantic.Field(default=None)
+    """
+    Whether to enable agent interruption:
+    - `true`: Enable interruption.
+    - `false`: Disable interruption. When disabled, the agent cannot be interrupted mid-response.
+    """
+
+    mode: typing.Optional[StartAgentsRequestPropertiesInterruptionMode] = pydantic.Field(default=None)
+    """
+    The interruption trigger mode:
+    - `start_of_speech`: Trigger interruption when the user starts speaking.
+    - `keywords`: Trigger interruption when the user speaks a specified keyword. Configure the trigger keywords in `keywords_config`.
+    """
+
+    keywords_config: typing.Optional[StartAgentsRequestPropertiesInterruptionKeywordsConfig] = pydantic.Field(
+        default=None
+    )
+    """
+    Configuration for keyword-based interruption triggering. Applicable only when `mode` is `keywords`.
+    """
+
+    disabled_config: typing.Optional[StartAgentsRequestPropertiesInterruptionDisabledConfig] = pydantic.Field(
+        default=None
+    )
+    """
+    Configuration for agent behavior when interruption is disabled. Applicable only when `interruption.enable` is `false`.
+    """
+
+    if IS_PYDANTIC_V2:
+        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
+    else:
+
+        class Config:
+            frozen = True
+            smart_union = True
+            extra = pydantic.Extra.allow
diff --git a/src/agora_agent/agents/types/start_agents_request_properties_interruption_disabled_config.py b/src/agora_agent/agents/types/start_agents_request_properties_interruption_disabled_config.py
new file mode 100644
index 0000000..fb476dd
--- /dev/null
+++ b/src/agora_agent/agents/types/start_agents_request_properties_interruption_disabled_config.py
@@ -0,0 +1,34 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+import pydantic
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
+from ...core.unchecked_base_model import UncheckedBaseModel
+from .start_agents_request_properties_interruption_disabled_config_strategy import (
+    StartAgentsRequestPropertiesInterruptionDisabledConfigStrategy,
+)
+
+
+class StartAgentsRequestPropertiesInterruptionDisabledConfig(UncheckedBaseModel):
+    """
+    Configuration for agent behavior when interruption is disabled. Applicable only when `interruption.enable` is `false`.
+    """
+
+    strategy: typing.Optional[StartAgentsRequestPropertiesInterruptionDisabledConfigStrategy] = pydantic.Field(
+        default=None
+    )
+    """
+    The processing strategy when interruption is disabled:
+    - `append`: User speech does not interrupt the agent. The agent processes the user's input after the current interaction ends.
+    - `ignore`: The agent ignores user speech. If the agent receives user speech while speaking or thinking, it discards the input without storing it in context.
+    """
+
+    if IS_PYDANTIC_V2:
+        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
+    else:
+
+        class Config:
+            frozen = True
+            smart_union = True
+            extra = pydantic.Extra.allow
diff --git a/src/agora_agent/agents/types/start_agents_request_properties_interruption_disabled_config_strategy.py b/src/agora_agent/agents/types/start_agents_request_properties_interruption_disabled_config_strategy.py
new file mode 100644
index 0000000..dd4195c
--- /dev/null
+++ b/src/agora_agent/agents/types/start_agents_request_properties_interruption_disabled_config_strategy.py
@@ -0,0 +1,7 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+StartAgentsRequestPropertiesInterruptionDisabledConfigStrategy = typing.Union[
+    typing.Literal["append", "ignore"], typing.Any
+]
diff --git a/src/agora_agent/agents/types/start_agents_request_properties_interruption_keywords_config.py b/src/agora_agent/agents/types/start_agents_request_properties_interruption_keywords_config.py
new file mode 100644
index 0000000..b4c89fc
--- /dev/null
+++ b/src/agora_agent/agents/types/start_agents_request_properties_interruption_keywords_config.py
@@ -0,0 +1,27 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+import pydantic
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
+from ...core.unchecked_base_model import UncheckedBaseModel
+
+
+class StartAgentsRequestPropertiesInterruptionKeywordsConfig(UncheckedBaseModel):
+    """
+    Configuration for keyword-based interruption triggering. Applicable only when `mode` is `keywords`.
+    """
+
+    trigger_keywords: typing.Optional[typing.List[str]] = pydantic.Field(default=None)
+    """
+    The list of keywords that trigger an interruption. A maximum of 128 keywords is supported.
+    """
+
+    if IS_PYDANTIC_V2:
+        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
+    else:
+
+        class Config:
+            frozen = True
+            smart_union = True
+            extra = pydantic.Extra.allow
diff --git a/src/agora_agent/agents/types/start_agents_request_properties_interruption_mode.py b/src/agora_agent/agents/types/start_agents_request_properties_interruption_mode.py
new file mode 100644
index 0000000..7f19451
--- /dev/null
+++ b/src/agora_agent/agents/types/start_agents_request_properties_interruption_mode.py
@@ -0,0 +1,5 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+StartAgentsRequestPropertiesInterruptionMode = typing.Union[typing.Literal["start_of_speech", "keywords"], typing.Any]
diff --git a/src/agora_agent/agents/types/start_agents_request_properties_llm.py b/src/agora_agent/agents/types/start_agents_request_properties_llm.py
index 41ced33..20c391e 100644
--- a/src/agora_agent/agents/types/start_agents_request_properties_llm.py
+++ b/src/agora_agent/agents/types/start_agents_request_properties_llm.py
@@ -100,6 +100,11 @@ class StartAgentsRequestPropertiesLlm(UncheckedBaseModel):
     MCP (Model Context Protocol) server configuration. By configuring MCP servers, agents can call tools provided by external services to implement advanced functionality.
     """
 
+    headers: typing.Optional[typing.Dict[str, str]] = pydantic.Field(default=None)
+    """
+    Custom headers to include in requests to the LLM. Use this field to pass business-specific information such as custom fields or tenant identifiers. These headers are merged with the headers generated by the Conversational AI Engine. If a key conflict occurs, the engine-generated header takes precedence.
+    """
+
     if IS_PYDANTIC_V2:
         model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
     else:
diff --git a/src/agora_agent/agents/types/start_agents_request_properties_llm_greeting_configs.py b/src/agora_agent/agents/types/start_agents_request_properties_llm_greeting_configs.py
index 46355b7..a8594ee 100644
--- a/src/agora_agent/agents/types/start_agents_request_properties_llm_greeting_configs.py
+++ b/src/agora_agent/agents/types/start_agents_request_properties_llm_greeting_configs.py
@@ -22,6 +22,11 @@ class StartAgentsRequestPropertiesLlmGreetingConfigs(UncheckedBaseModel):
     - `single_first`: Broadcasts a greeting only once to the first user who joins the channel.
     """
 
+    delay_ms: typing.Optional[int] = pydantic.Field(default=None)
+    """
+    The delay in milliseconds before the agent plays the greeting message after a user joins the channel.
+    """
+
     if IS_PYDANTIC_V2:
         model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
     else:
diff --git a/src/agora_agent/agents/types/start_agents_request_properties_mllm.py b/src/agora_agent/agents/types/start_agents_request_properties_mllm.py
index d0693f6..e84422c 100644
--- a/src/agora_agent/agents/types/start_agents_request_properties_mllm.py
+++ b/src/agora_agent/agents/types/start_agents_request_properties_mllm.py
@@ -5,6 +5,7 @@
 import pydantic
 from ...core.pydantic_utilities import IS_PYDANTIC_V2
 from ...core.unchecked_base_model import UncheckedBaseModel
+from .start_agents_request_properties_mllm_turn_detection import StartAgentsRequestPropertiesMllmTurnDetection
 from .start_agents_request_properties_mllm_vendor import StartAgentsRequestPropertiesMllmVendor
 
 
@@ -13,6 +14,11 @@ class StartAgentsRequestPropertiesMllm(UncheckedBaseModel):
     Multimodal Large Language Model (MLLM) configuration for real-time audio and text processing. `mllm` is an exclusive alternative to the standard `asr` + `llm` + `tts` pipeline.
     """
 
+    enable: typing.Optional[bool] = pydantic.Field(default=None)
+    """
+    Enable Multimodal Large Language Model for voice-to-voice processing. Enabling MLLM automatically disables ASR, LLM, and TTS since the MLLM handles end-to-end voice processing directly. Replaces the deprecated `advanced_features.enable_mllm`.
+    """
+
     url: typing.Optional[str] = pydantic.Field(default=None)
     """
     The MLLM WebSocket URL for real-time communication.
@@ -30,7 +36,7 @@ class StartAgentsRequestPropertiesMllm(UncheckedBaseModel):
 
     params: typing.Optional[typing.Dict[str, typing.Any]] = pydantic.Field(default=None)
     """
-    Additional MLLM configuration parameters. The `modalities` setting is overridden by `input_modalities` and `output_modalities`. The `turn_detection` setting is overridden by the `turn_detection` section outside of `mllm`.
+    Additional MLLM configuration parameters. The `modalities` setting is overridden by `input_modalities` and `output_modalities`. The `turn_detection` setting is overridden by `mllm.turn_detection`.
     """
 
     input_modalities: typing.Optional[typing.List[str]] = pydantic.Field(default=None)
@@ -51,6 +57,21 @@ class StartAgentsRequestPropertiesMllm(UncheckedBaseModel):
     Agent greeting message. If provided, the first user in the channel is automatically greeted with this message upon joining.
     """
 
+    failure_message: typing.Optional[str] = pydantic.Field(default=None)
+    """
+    Message played when the MLLM call fails.
+    """
+
+    max_history: typing.Optional[int] = pydantic.Field(default=None)
+    """
+    Maximum number of conversation history messages cached for the MLLM session.
+    """
+
+    predefined_tools: typing.Optional[typing.List[str]] = pydantic.Field(default=None)
+    """
+    Predefined tools available to the MLLM provider.
+    """
+
     vendor: typing.Optional[StartAgentsRequestPropertiesMllmVendor] = pydantic.Field(default=None)
     """
     MLLM provider. Currently supports:
@@ -59,10 +80,9 @@ class StartAgentsRequestPropertiesMllm(UncheckedBaseModel):
     - `vertexai`: Google Gemini Live (Vertex AI)
     """
 
-    style: typing.Optional[typing.Literal["openai"]] = pydantic.Field(default=None)
+    turn_detection: typing.Optional[StartAgentsRequestPropertiesMllmTurnDetection] = pydantic.Field(default=None)
     """
-    The request style for MLLM completion:
-    - `openai`: For OpenAI Realtime API format
+    Turn detection configuration for the MLLM module. When defined, the top-level `turn_detection` object has no effect.
     """
 
     if IS_PYDANTIC_V2:
diff --git a/src/agora_agent/agents/types/start_agents_request_properties_mllm_turn_detection.py b/src/agora_agent/agents/types/start_agents_request_properties_mllm_turn_detection.py
new file mode 100644
index 0000000..9298a0c
--- /dev/null
+++ b/src/agora_agent/agents/types/start_agents_request_properties_mllm_turn_detection.py
@@ -0,0 +1,61 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+import pydantic
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
+from ...core.unchecked_base_model import UncheckedBaseModel
+from .start_agents_request_properties_mllm_turn_detection_agora_vad_config import (
+    StartAgentsRequestPropertiesMllmTurnDetectionAgoraVadConfig,
+)
+from .start_agents_request_properties_mllm_turn_detection_mode import StartAgentsRequestPropertiesMllmTurnDetectionMode
+from .start_agents_request_properties_mllm_turn_detection_semantic_vad_config import (
+    StartAgentsRequestPropertiesMllmTurnDetectionSemanticVadConfig,
+)
+from .start_agents_request_properties_mllm_turn_detection_server_vad_config import (
+    StartAgentsRequestPropertiesMllmTurnDetectionServerVadConfig,
+)
+
+
+class StartAgentsRequestPropertiesMllmTurnDetection(UncheckedBaseModel):
+    """
+    Turn detection configuration for the MLLM module. When defined, the top-level `turn_detection` object has no effect.
+    """
+
+    mode: typing.Optional[StartAgentsRequestPropertiesMllmTurnDetectionMode] = pydantic.Field(default=None)
+    """
+    Turn detection mode for MLLM:
+    - `agora_vad`: Agora VAD-based detection.
+    - `server_vad`: Vendor-side VAD-based detection. Supported by OpenAI Realtime API and Gemini Live.
+    - `semantic_vad`: Semantic-based detection. Supported by OpenAI Realtime API only.
+    """
+
+    agora_vad_config: typing.Optional[StartAgentsRequestPropertiesMllmTurnDetectionAgoraVadConfig] = pydantic.Field(
+        default=None
+    )
+    """
+    Configuration for Agora VAD-based turn detection. Applicable when `mode` is `agora_vad`.
+    """
+
+    server_vad_config: typing.Optional[StartAgentsRequestPropertiesMllmTurnDetectionServerVadConfig] = pydantic.Field(
+        default=None
+    )
+    """
+    Configuration for vendor-side VAD-based turn detection. Applicable when `mode` is `server_vad`. Parameters are passed through to the vendor.
+    """
+
+    semantic_vad_config: typing.Optional[StartAgentsRequestPropertiesMllmTurnDetectionSemanticVadConfig] = (
+        pydantic.Field(default=None)
+    )
+    """
+    Configuration for semantic-based turn detection. Applicable when `mode` is `semantic_vad`. Supported by OpenAI Realtime API only.
+    """
+
+    if IS_PYDANTIC_V2:
+        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
+    else:
+
+        class Config:
+            frozen = True
+            smart_union = True
+            extra = pydantic.Extra.allow
diff --git a/src/agora_agent/agents/types/start_agents_request_properties_mllm_turn_detection_agora_vad_config.py b/src/agora_agent/agents/types/start_agents_request_properties_mllm_turn_detection_agora_vad_config.py
new file mode 100644
index 0000000..ec30215
--- /dev/null
+++ b/src/agora_agent/agents/types/start_agents_request_properties_mllm_turn_detection_agora_vad_config.py
@@ -0,0 +1,42 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+import pydantic
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
+from ...core.unchecked_base_model import UncheckedBaseModel
+
+
+class StartAgentsRequestPropertiesMllmTurnDetectionAgoraVadConfig(UncheckedBaseModel):
+    """
+    Configuration for Agora VAD-based turn detection. Applicable when `mode` is `agora_vad`.
+    """
+
+    interrupt_duration_ms: typing.Optional[int] = pydantic.Field(default=None)
+    """
+    Minimum duration of speech in milliseconds required to trigger an interruption.
+    """
+
+    prefix_padding_ms: typing.Optional[int] = pydantic.Field(default=None)
+    """
+    Duration of audio in milliseconds to include before the detected speech start.
+    """
+
+    silence_duration_ms: typing.Optional[int] = pydantic.Field(default=None)
+    """
+    Duration of silence in milliseconds required to determine end of speech.
+    """
+
+    threshold: typing.Optional[float] = pydantic.Field(default=None)
+    """
+    VAD sensitivity threshold. A higher value reduces false positives.
+    """
+
+    if IS_PYDANTIC_V2:
+        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
+    else:
+
+        class Config:
+            frozen = True
+            smart_union = True
+            extra = pydantic.Extra.allow
diff --git a/src/agora_agent/agents/types/start_agents_request_properties_mllm_turn_detection_mode.py b/src/agora_agent/agents/types/start_agents_request_properties_mllm_turn_detection_mode.py
new file mode 100644
index 0000000..0d004e8
--- /dev/null
+++ b/src/agora_agent/agents/types/start_agents_request_properties_mllm_turn_detection_mode.py
@@ -0,0 +1,7 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+StartAgentsRequestPropertiesMllmTurnDetectionMode = typing.Union[
+    typing.Literal["agora_vad", "server_vad", "semantic_vad"], typing.Any
+]
diff --git a/src/agora_agent/agents/types/start_agents_request_properties_mllm_turn_detection_semantic_vad_config.py b/src/agora_agent/agents/types/start_agents_request_properties_mllm_turn_detection_semantic_vad_config.py
new file mode 100644
index 0000000..1e310f0
--- /dev/null
+++ b/src/agora_agent/agents/types/start_agents_request_properties_mllm_turn_detection_semantic_vad_config.py
@@ -0,0 +1,32 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+import pydantic
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
+from ...core.unchecked_base_model import UncheckedBaseModel
+from .start_agents_request_properties_mllm_turn_detection_semantic_vad_config_eagerness import (
+    StartAgentsRequestPropertiesMllmTurnDetectionSemanticVadConfigEagerness,
+)
+
+
+class StartAgentsRequestPropertiesMllmTurnDetectionSemanticVadConfig(UncheckedBaseModel):
+    """
+    Configuration for semantic-based turn detection. Applicable when `mode` is `semantic_vad`. Supported by OpenAI Realtime API only.
+    """
+
+    eagerness: typing.Optional[StartAgentsRequestPropertiesMllmTurnDetectionSemanticVadConfigEagerness] = (
+        pydantic.Field(default=None)
+    )
+    """
+    Controls how eagerly the model ends its turn.
+    """
+
+    if IS_PYDANTIC_V2:
+        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
+    else:
+
+        class Config:
+            frozen = True
+            smart_union = True
+            extra = pydantic.Extra.allow
diff --git a/src/agora_agent/agents/types/start_agents_request_properties_mllm_turn_detection_semantic_vad_config_eagerness.py b/src/agora_agent/agents/types/start_agents_request_properties_mllm_turn_detection_semantic_vad_config_eagerness.py
new file mode 100644
index 0000000..8b67b1d
--- /dev/null
+++ b/src/agora_agent/agents/types/start_agents_request_properties_mllm_turn_detection_semantic_vad_config_eagerness.py
@@ -0,0 +1,7 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+StartAgentsRequestPropertiesMllmTurnDetectionSemanticVadConfigEagerness = typing.Union[
+    typing.Literal["auto", "low", "medium", "high"], typing.Any
+]
diff --git a/src/agora_agent/agents/types/start_agents_request_properties_mllm_turn_detection_server_vad_config.py b/src/agora_agent/agents/types/start_agents_request_properties_mllm_turn_detection_server_vad_config.py
new file mode 100644
index 0000000..d27b76e
--- /dev/null
+++ b/src/agora_agent/agents/types/start_agents_request_properties_mllm_turn_detection_server_vad_config.py
@@ -0,0 +1,62 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+import pydantic
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
+from ...core.unchecked_base_model import UncheckedBaseModel
+from .start_agents_request_properties_mllm_turn_detection_server_vad_config_end_of_speech_sensitivity import (
+    StartAgentsRequestPropertiesMllmTurnDetectionServerVadConfigEndOfSpeechSensitivity,
+)
+from .start_agents_request_properties_mllm_turn_detection_server_vad_config_start_of_speech_sensitivity import (
+    StartAgentsRequestPropertiesMllmTurnDetectionServerVadConfigStartOfSpeechSensitivity,
+)
+
+
+class StartAgentsRequestPropertiesMllmTurnDetectionServerVadConfig(UncheckedBaseModel):
+    """
+    Configuration for vendor-side VAD-based turn detection. Applicable when `mode` is `server_vad`. Parameters are passed through to the vendor.
+    """
+
+    prefix_padding_ms: typing.Optional[int] = pydantic.Field(default=None)
+    """
+    Duration of audio in milliseconds to include before the detected speech start.
+    """
+
+    silence_duration_ms: typing.Optional[int] = pydantic.Field(default=None)
+    """
+    Duration of silence in milliseconds required to determine end of speech.
+    """
+
+    threshold: typing.Optional[float] = pydantic.Field(default=None)
+    """
+    VAD sensitivity threshold. Applicable to OpenAI Realtime API only.
+    """
+
+    idle_timeout_ms: typing.Optional[int] = pydantic.Field(default=None)
+    """
+    Idle timeout in milliseconds. Applicable to OpenAI Realtime API only.
+    """
+
+    start_of_speech_sensitivity: typing.Optional[
+        StartAgentsRequestPropertiesMllmTurnDetectionServerVadConfigStartOfSpeechSensitivity
+    ] = pydantic.Field(default=None)
+    """
+    Sensitivity for start of speech detection. Applicable to Gemini Live only.
+    """
+
+    end_of_speech_sensitivity: typing.Optional[
+        StartAgentsRequestPropertiesMllmTurnDetectionServerVadConfigEndOfSpeechSensitivity
+    ] = pydantic.Field(default=None)
+    """
+    Sensitivity for end of speech detection. Applicable to Gemini Live only.
+    """
+
+    if IS_PYDANTIC_V2:
+        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
+    else:
+
+        class Config:
+            frozen = True
+            smart_union = True
+            extra = pydantic.Extra.allow
diff --git a/src/agora_agent/agents/types/start_agents_request_properties_mllm_turn_detection_server_vad_config_end_of_speech_sensitivity.py b/src/agora_agent/agents/types/start_agents_request_properties_mllm_turn_detection_server_vad_config_end_of_speech_sensitivity.py
new file mode 100644
index 0000000..e92d3f1
--- /dev/null
+++ b/src/agora_agent/agents/types/start_agents_request_properties_mllm_turn_detection_server_vad_config_end_of_speech_sensitivity.py
@@ -0,0 +1,7 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+StartAgentsRequestPropertiesMllmTurnDetectionServerVadConfigEndOfSpeechSensitivity = typing.Union[
+    typing.Literal["END_SENSITIVITY_HIGH", "END_SENSITIVITY_LOW"], typing.Any
+]
diff --git a/src/agora_agent/agents/types/start_agents_request_properties_mllm_turn_detection_server_vad_config_start_of_speech_sensitivity.py b/src/agora_agent/agents/types/start_agents_request_properties_mllm_turn_detection_server_vad_config_start_of_speech_sensitivity.py
new file mode 100644
index 0000000..25860c1
--- /dev/null
+++ b/src/agora_agent/agents/types/start_agents_request_properties_mllm_turn_detection_server_vad_config_start_of_speech_sensitivity.py
@@ -0,0 +1,7 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+StartAgentsRequestPropertiesMllmTurnDetectionServerVadConfigStartOfSpeechSensitivity = typing.Union[
+    typing.Literal["START_SENSITIVITY_HIGH", "START_SENSITIVITY_LOW"], typing.Any
+]
diff --git a/src/agora_agent/agents/types/start_agents_request_properties_parameters.py b/src/agora_agent/agents/types/start_agents_request_properties_parameters.py
index aee1492..9106f2c 100644
--- a/src/agora_agent/agents/types/start_agents_request_properties_parameters.py
+++ b/src/agora_agent/agents/types/start_agents_request_properties_parameters.py
@@ -5,6 +5,9 @@
 import pydantic
 from ...core.pydantic_utilities import IS_PYDANTIC_V2
 from ...core.unchecked_base_model import UncheckedBaseModel
+from .start_agents_request_properties_parameters_audio_scenario import (
+    StartAgentsRequestPropertiesParametersAudioScenario,
+)
 from .start_agents_request_properties_parameters_data_channel import StartAgentsRequestPropertiesParametersDataChannel
 from .start_agents_request_properties_parameters_farewell_config import (
     StartAgentsRequestPropertiesParametersFarewellConfig,
@@ -48,6 +51,14 @@ class StartAgentsRequestPropertiesParameters(UncheckedBaseModel):
     Whether to receive agent error events. This setting only takes effect when `advanced_features.enable_rtm` is `true`.
     """
 
+    audio_scenario: typing.Optional[StartAgentsRequestPropertiesParametersAudioScenario] = pydantic.Field(default=None)
+    """
+    The audio scenario for the RTC channel.
+    - `default`: Maps to `aiserver`.
+    - `chorus`: Real-time chorus scenario, where users have good network conditions and require ultra-low latency.
+    - `aiserver`: Optimized for interactions between the user and the conversational AI agent in terms of latency and network resilience.
+    """
+
     if IS_PYDANTIC_V2:
         model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
     else:
diff --git a/src/agora_agent/agents/types/start_agents_request_properties_parameters_audio_scenario.py b/src/agora_agent/agents/types/start_agents_request_properties_parameters_audio_scenario.py
new file mode 100644
index 0000000..8e14e90
--- /dev/null
+++ b/src/agora_agent/agents/types/start_agents_request_properties_parameters_audio_scenario.py
@@ -0,0 +1,7 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+StartAgentsRequestPropertiesParametersAudioScenario = typing.Union[
+    typing.Literal["default", "chorus", "aiserver"], typing.Any
+]
diff --git a/src/agora_agent/agents/types/start_agents_request_properties_turn_detection.py b/src/agora_agent/agents/types/start_agents_request_properties_turn_detection.py
index a768af1..40dbb02 100644
--- a/src/agora_agent/agents/types/start_agents_request_properties_turn_detection.py
+++ b/src/agora_agent/agents/types/start_agents_request_properties_turn_detection.py
@@ -15,7 +15,7 @@
 
 class StartAgentsRequestPropertiesTurnDetection(UncheckedBaseModel):
     """
-    Conversation turn detection settings. Controls the logic for voice activity detection and conversation turn determination.
+    Conversation turn detection settings. Controls the logic for voice activity detection and conversation turn determination. This object has no effect when `mllm.enable` is true; use `mllm.turn_detection` instead.
     """
 
     mode: typing.Optional[typing.Literal["default"]] = pydantic.Field(default=None)
diff --git a/src/agora_agent/agents/types/start_agents_request_properties_turn_detection_config_end_of_speech_semantic_config.py b/src/agora_agent/agents/types/start_agents_request_properties_turn_detection_config_end_of_speech_semantic_config.py
index 3c45e22..71e7722 100644
--- a/src/agora_agent/agents/types/start_agents_request_properties_turn_detection_config_end_of_speech_semantic_config.py
+++ b/src/agora_agent/agents/types/start_agents_request_properties_turn_detection_config_end_of_speech_semantic_config.py
@@ -18,6 +18,13 @@ class StartAgentsRequestPropertiesTurnDetectionConfigEndOfSpeechSemanticConfig(U
     Maximum wait time in milliseconds. Use `-1` for no timeout. The maximum time to wait for semantic determination. After timeout, the conversation end is determined based on the current state.
     """
 
+    pause_state_enabled: typing.Optional[bool] = pydantic.Field(default=None)
+    """
+    Whether to detect user intent to pause the conversation:
+    - `true`: The agent uses semantic understanding to determine if the user intends to pause the conversation. For example, when the user's input ends with phrases such as "hold on" or "just a moment", the agent waits for further input rather than treating the utterance as complete and sending it to the LLM.
+    - `false`: The agent does not detect intent to pause the conversation.
+    """
+
     if IS_PYDANTIC_V2:
         model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
     else:
diff --git a/src/agora_agent/agents/types/start_agents_request_properties_turn_detection_config_start_of_speech.py b/src/agora_agent/agents/types/start_agents_request_properties_turn_detection_config_start_of_speech.py
index 6d72018..cfd7a8a 100644
--- a/src/agora_agent/agents/types/start_agents_request_properties_turn_detection_config_start_of_speech.py
+++ b/src/agora_agent/agents/types/start_agents_request_properties_turn_detection_config_start_of_speech.py
@@ -28,8 +28,8 @@ class StartAgentsRequestPropertiesTurnDetectionConfigStartOfSpeech(UncheckedBase
     """
     Start of speech detection mode:
     - `vad`: Based on VAD (Voice Activity Detection). Uses audio signal detection.
-    - `keywords`: (Beta) Based on keyword trigger. Conversation begins when the agent detects a specified keyword.
-    - `disabled`: Disables start of speech detection. Does not actively trigger new conversation turns.
+    - `keywords`: Deprecated. Use `interruption.mode = "keywords"` instead.
+    - `disabled`: Deprecated. Use `interruption.enable = false` with `interruption.disabled_config.strategy` to configure the handling strategy.
     """
 
     vad_config: typing.Optional[StartAgentsRequestPropertiesTurnDetectionConfigStartOfSpeechVadConfig] = pydantic.Field(
diff --git a/src/agora_agent/client.py b/src/agora_agent/client.py
index 3f2290c..8a981af 100644
--- a/src/agora_agent/client.py
+++ b/src/agora_agent/client.py
@@ -9,6 +9,7 @@
 from .environment import AgoraEnvironment
 
 if typing.TYPE_CHECKING:
+    from .agent_management.client import AgentManagementClient, AsyncAgentManagementClient
     from .agents.client import AgentsClient, AsyncAgentsClient
     from .phone_numbers.client import AsyncPhoneNumbersClient, PhoneNumbersClient
     from .telephony.client import AsyncTelephonyClient, TelephonyClient
@@ -88,6 +89,7 @@ def __init__(
             timeout=_defaulted_timeout,
         )
         self._agents: typing.Optional[AgentsClient] = None
+        self._agent_management: typing.Optional[AgentManagementClient] = None
         self._telephony: typing.Optional[TelephonyClient] = None
         self._phone_numbers: typing.Optional[PhoneNumbersClient] = None
 
@@ -99,6 +101,14 @@ def agents(self):
             self._agents = AgentsClient(client_wrapper=self._client_wrapper)
         return self._agents
 
+    @property
+    def agent_management(self):
+        if self._agent_management is None:
+            from .agent_management.client import AgentManagementClient  # noqa: E402
+
+            self._agent_management = AgentManagementClient(client_wrapper=self._client_wrapper)
+        return self._agent_management
+
     @property
     def telephony(self):
         if self._telephony is None:
@@ -190,6 +200,7 @@ def __init__(
             timeout=_defaulted_timeout,
         )
         self._agents: typing.Optional[AsyncAgentsClient] = None
+        self._agent_management: typing.Optional[AsyncAgentManagementClient] = None
         self._telephony: typing.Optional[AsyncTelephonyClient] = None
         self._phone_numbers: typing.Optional[AsyncPhoneNumbersClient] = None
 
@@ -201,6 +212,14 @@ def agents(self):
             self._agents = AsyncAgentsClient(client_wrapper=self._client_wrapper)
         return self._agents
 
+    @property
+    def agent_management(self):
+        if self._agent_management is None:
+            from .agent_management.client import AsyncAgentManagementClient  # noqa: E402
+
+            self._agent_management = AsyncAgentManagementClient(client_wrapper=self._client_wrapper)
+        return self._agent_management
+
     @property
     def telephony(self):
         if self._telephony is None:
diff --git a/src/agora_agent/core/client_wrapper.py b/src/agora_agent/core/client_wrapper.py
index e632923..c5a0e03 100644
--- a/src/agora_agent/core/client_wrapper.py
+++ b/src/agora_agent/core/client_wrapper.py
@@ -26,10 +26,10 @@ def __init__(
 
     def get_headers(self) -> typing.Dict[str, str]:
         headers: typing.Dict[str, str] = {
-            "User-Agent": "agora-agent-server-sdk/1.3.0",
+            "User-Agent": "agora-agent-server-sdk/v1.4.0",
             "X-Fern-Language": "Python",
             "X-Fern-SDK-Name": "agora-agent-server-sdk",
-            "X-Fern-SDK-Version": "1.3.0",
+            "X-Fern-SDK-Version": "v1.4.0",
             **(self.get_custom_headers() or {}),
         }
         headers["Authorization"] = httpx.BasicAuth(self._get_username(), self._get_password())._auth_header
diff --git a/src/agora_agent/core/pydantic_utilities.py b/src/agora_agent/core/pydantic_utilities.py
index e586456..185e5c4 100644
--- a/src/agora_agent/core/pydantic_utilities.py
+++ b/src/agora_agent/core/pydantic_utilities.py
@@ -2,43 +2,22 @@
 
 # nopycln: file
 import datetime as dt
-import types
-import typing
 from collections import defaultdict
 from typing import Any, Callable, ClassVar, Dict, List, Mapping, Optional, Set, Tuple, Type, TypeVar, Union, cast
 
 import pydantic
-import typing_extensions
-from .datetime_utils import serialize_datetime
-from .serialization import convert_and_respect_annotation_metadata
-from typing_extensions import TypeAlias
 
 IS_PYDANTIC_V2 = pydantic.VERSION.startswith("2.")
 
 if IS_PYDANTIC_V2:
-    UnionType: Any = getattr(types, "UnionType", None)
-    ModelField = Any
-    encoders_by_type = {
-        dt.date: str,
-        dt.datetime: serialize_datetime,
-    }
-    get_args = typing_extensions.get_args
-    get_origin = typing_extensions.get_origin
-
-    _DATE_ADAPTER = pydantic.TypeAdapter(dt.date)  # type: ignore[attr-defined]
-    _DATETIME_ADAPTER = pydantic.TypeAdapter(dt.datetime)  # type: ignore[attr-defined]
-
-    def parse_date(value: Any) -> dt.date:
-        return _DATE_ADAPTER.validate_python(value)
-
-    def parse_datetime(value: Any) -> dt.datetime:
-        return _DATETIME_ADAPTER.validate_python(value)
-
-    def is_literal_type(type_: Any) -> bool:
-        return get_origin(type_) in (typing.Literal, typing_extensions.Literal)
-
-    def is_union(type_: Any) -> bool:
-        return get_origin(type_) in (Union, UnionType)
+    from pydantic.v1.datetime_parse import parse_date as parse_date
+    from pydantic.v1.datetime_parse import parse_datetime as parse_datetime
+    from pydantic.v1.fields import ModelField as ModelField
+    from pydantic.v1.json import ENCODERS_BY_TYPE as encoders_by_type  # type: ignore[attr-defined]
+    from pydantic.v1.typing import get_args as get_args
+    from pydantic.v1.typing import get_origin as get_origin
+    from pydantic.v1.typing import is_literal_type as is_literal_type
+    from pydantic.v1.typing import is_union as is_union
 else:
     from pydantic.datetime_parse import parse_date as parse_date  # type: ignore[no-redef]
     from pydantic.datetime_parse import parse_datetime as parse_datetime  # type: ignore[no-redef]
@@ -49,6 +28,10 @@ def is_union(type_: Any) -> bool:
     from pydantic.typing import is_literal_type as is_literal_type  # type: ignore[no-redef]
     from pydantic.typing import is_union as is_union  # type: ignore[no-redef]
 
+from .datetime_utils import serialize_datetime
+from .serialization import convert_and_respect_annotation_metadata
+from typing_extensions import TypeAlias
+
 T = TypeVar("T")
 Model = TypeVar("Model", bound=pydantic.BaseModel)
 
@@ -210,15 +193,12 @@ class V2RootModel(UniversalBaseModel, pydantic.RootModel):  # type: ignore[misc,
 
 
 def encode_by_type(o: Any) -> Any:
-    encoders_by_class_tuples: Dict[Callable[[Any], Any], Tuple[Type[Any], ...]] = {}
+    encoders_by_class_tuples: Dict[Callable[[Any], Any], Tuple[Any, ...]] = defaultdict(tuple)
     for type_, encoder in encoders_by_type.items():
-        typed_encoder = cast(Callable[[Any], Any], encoder)
-        typed_type = cast(Type[Any], type_)
-        encoders_by_class_tuples[typed_encoder] = encoders_by_class_tuples.get(typed_encoder, ()) + (typed_type,)
+        encoders_by_class_tuples[encoder] += (type_,)
 
     if type(o) in encoders_by_type:
-        encoder = cast(Callable[[Any], Any], encoders_by_type[type(o)])
-        return encoder(o)
+        return encoders_by_type[type(o)](o)
     for encoder, classes_tuple in encoders_by_class_tuples.items():
         if isinstance(o, classes_tuple):
             return encoder(o)
diff --git a/src/agora_agent/types/cartesia_tts_params.py b/src/agora_agent/types/cartesia_tts_params.py
index ed3aa8f..2aaf069 100644
--- a/src/agora_agent/types/cartesia_tts_params.py
+++ b/src/agora_agent/types/cartesia_tts_params.py
@@ -5,6 +5,7 @@
 import pydantic
 from ..core.pydantic_utilities import IS_PYDANTIC_V2
 from ..core.unchecked_base_model import UncheckedBaseModel
+from .cartesia_tts_voice import CartesiaTtsVoice
 
 
 class CartesiaTtsParams(UncheckedBaseModel):
@@ -12,16 +13,12 @@ class CartesiaTtsParams(UncheckedBaseModel):
     Cartesia TTS configuration parameters.
     """
 
-    key: str = pydantic.Field()
+    api_key: str = pydantic.Field()
     """
     Cartesia API key
     """
 
-    voice_id: str = pydantic.Field()
-    """
-    Cartesia voice ID
-    """
-
+    voice: CartesiaTtsVoice
     model_id: typing.Optional[str] = pydantic.Field(default=None)
     """
     Model ID (optional)
diff --git a/src/agora_agent/types/cartesia_tts_voice.py b/src/agora_agent/types/cartesia_tts_voice.py
new file mode 100644
index 0000000..f49ee45
--- /dev/null
+++ b/src/agora_agent/types/cartesia_tts_voice.py
@@ -0,0 +1,32 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+import pydantic
+from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ..core.unchecked_base_model import UncheckedBaseModel
+
+
+class CartesiaTtsVoice(UncheckedBaseModel):
+    """
+    Cartesia voice selection.
+    """
+
+    mode: typing.Literal["id"] = pydantic.Field(default="id")
+    """
+    Cartesia voice selection mode.
+    """
+
+    id: str = pydantic.Field()
+    """
+    Cartesia voice ID
+    """
+
+    if IS_PYDANTIC_V2:
+        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
+    else:
+
+        class Config:
+            frozen = True
+            smart_union = True
+            extra = pydantic.Extra.allow
diff --git a/src/agora_agent/types/deepgram_tts.py b/src/agora_agent/types/deepgram_tts.py
new file mode 100644
index 0000000..ce38d9e
--- /dev/null
+++ b/src/agora_agent/types/deepgram_tts.py
@@ -0,0 +1,29 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+import pydantic
+from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ..core.unchecked_base_model import UncheckedBaseModel
+from .deepgram_tts_params import DeepgramTtsParams
+
+
+class DeepgramTts(UncheckedBaseModel):
+    """
+    Deepgram Text-to-Speech configuration (Beta).
+    """
+
+    params: DeepgramTtsParams
+    skip_patterns: typing.Optional[typing.List[int]] = pydantic.Field(default=None)
+    """
+    Controls whether the TTS module skips bracketed content when reading LLM response text.
+    """
+
+    if IS_PYDANTIC_V2:
+        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
+    else:
+
+        class Config:
+            frozen = True
+            smart_union = True
+            extra = pydantic.Extra.allow
diff --git a/src/agora_agent/types/deepgram_tts_params.py b/src/agora_agent/types/deepgram_tts_params.py
new file mode 100644
index 0000000..e858291
--- /dev/null
+++ b/src/agora_agent/types/deepgram_tts_params.py
@@ -0,0 +1,52 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+import pydantic
+from ..core.pydantic_utilities import IS_PYDANTIC_V2
+from ..core.unchecked_base_model import UncheckedBaseModel
+
+
+class DeepgramTtsParams(UncheckedBaseModel):
+    """
+    Deepgram TTS configuration parameters.
+    """
+
+    api_key: str = pydantic.Field()
+    """
+    Deepgram API key
+    """
+
+    model: str = pydantic.Field()
+    """
+    Deepgram TTS model (for example, "aura-2-thalia-en")
+    """
+
+    base_url: typing.Optional[str] = pydantic.Field(default=None)
+    """
+    Deepgram WebSocket endpoint override
+    """
+
+    sample_rate: typing.Optional[int] = pydantic.Field(default=None)
+    """
+    Audio sampling rate in Hz
+    """
+
+    params: typing.Optional[typing.Dict[str, typing.Any]] = pydantic.Field(default=None)
+    """
+    Additional Deepgram TTS parameters
+    """
+
+    skip_patterns: typing.Optional[typing.List[int]] = pydantic.Field(default=None)
+    """
+    Controls whether the TTS module skips bracketed content when reading LLM response text.
+    """
+
+    if IS_PYDANTIC_V2:
+        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
+    else:
+
+        class Config:
+            frozen = True
+            smart_union = True
+            extra = pydantic.Extra.allow
diff --git a/src/agora_agent/types/open_ai_tts_params.py b/src/agora_agent/types/open_ai_tts_params.py
index 458292d..67a6e1d 100644
--- a/src/agora_agent/types/open_ai_tts_params.py
+++ b/src/agora_agent/types/open_ai_tts_params.py
@@ -12,7 +12,7 @@ class OpenAiTtsParams(UncheckedBaseModel):
     OpenAI TTS configuration parameters.
     """
 
-    key: str = pydantic.Field()
+    api_key: typing.Optional[str] = pydantic.Field(default=None)
     """
     OpenAI API key
     """
diff --git a/src/agora_agent/types/tts.py b/src/agora_agent/types/tts.py
index 56a36fd..85761fd 100644
--- a/src/agora_agent/types/tts.py
+++ b/src/agora_agent/types/tts.py
@@ -10,6 +10,7 @@
 from ..core.unchecked_base_model import UncheckedBaseModel, UnionMetadata
 from .amazon_tts_params import AmazonTtsParams
 from .cartesia_tts_params import CartesiaTtsParams
+from .deepgram_tts_params import DeepgramTtsParams
 from .eleven_labs_tts_params import ElevenLabsTtsParams
 from .fish_audio_tts_params import FishAudioTtsParams
 from .google_tts_params import GoogleTtsParams
@@ -202,6 +203,21 @@ class Config:
             extra = pydantic.Extra.allow
 
 
+class Tts_Deepgram(UncheckedBaseModel):
+    vendor: typing.Literal["deepgram"] = "deepgram"
+    params: DeepgramTtsParams
+    skip_patterns: typing.Optional[typing.List[int]] = None
+
+    if IS_PYDANTIC_V2:
+        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
+    else:
+
+        class Config:
+            frozen = True
+            smart_union = True
+            extra = pydantic.Extra.allow
+
+
 Tts = typing_extensions.Annotated[
     typing.Union[
         Tts_Microsoft,
@@ -216,6 +232,7 @@ class Config:
         Tts_Google,
         Tts_Amazon,
         Tts_Sarvam,
+        Tts_Deepgram,
     ],
     UnionMetadata(discriminant="vendor"),
 ]
diff --git a/tests/__init__.py b/tests/__init__.py
deleted file mode 100644
index f452943..0000000
--- a/tests/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-# This file makes the test suite an explicit package for mypy module resolution.
diff --git a/tests/agentkit/__init__.py b/tests/agentkit/__init__.py
deleted file mode 100644
index 394ea77..0000000
--- a/tests/agentkit/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-# This file makes the AgentKit test suite an explicit package for mypy module resolution.
diff --git a/tests/agentkit/helpers.py b/tests/agentkit/helpers.py
deleted file mode 100644
index 3936836..0000000
--- a/tests/agentkit/helpers.py
+++ /dev/null
@@ -1,112 +0,0 @@
-from __future__ import annotations
-
-from types import SimpleNamespace
-from typing import Any, Dict, List, Optional
-
-
-def dump_model(value: Any) -> Any:
-    if hasattr(value, "model_dump"):
-        return value.model_dump(exclude_none=True)
-    if isinstance(value, dict):
-        return {k: dump_model(v) for k, v in value.items()}
-    if isinstance(value, list):
-        return [dump_model(v) for v in value]
-    return value
-
-
-class DummyAgents:
-    def __init__(self) -> None:
-        self.start_calls: List[Any] = []
-        self.stop_calls: List[Any] = []
-        self.speak_calls: List[Any] = []
-        self.interrupt_calls: List[Any] = []
-        self.update_calls: List[Any] = []
-        self.history_calls: List[Any] = []
-        self.turn_calls: List[Any] = []
-        self.get_calls: List[Any] = []
-
-        self.start_result: Any = SimpleNamespace(agent_id="agent-1")
-        self.start_error: Optional[Exception] = None
-        self.stop_error: Optional[Exception] = None
-
-    def start(self, app_id, **kwargs):
-        self.start_calls.append((app_id, kwargs))
-        if self.start_error is not None:
-            raise self.start_error
-        return self.start_result
-
-    def stop(self, app_id, agent_id, request_options=None):
-        self.stop_calls.append((app_id, agent_id, request_options))
-        if self.stop_error is not None:
-            raise self.stop_error
-        return None
-
-    def speak(self, app_id, agent_id, request_options=None, **kwargs):
-        self.speak_calls.append((app_id, agent_id, request_options, kwargs))
-        return None
-
-    def interrupt(self, app_id, agent_id, request_options=None):
-        self.interrupt_calls.append((app_id, agent_id, request_options))
-        return None
-
-    def update(self, app_id, agent_id, properties=None, request_options=None):
-        self.update_calls.append((app_id, agent_id, properties, request_options))
-        return None
-
-    def get_history(self, app_id, agent_id, request_options=None):
-        self.history_calls.append((app_id, agent_id, request_options))
-        return {"contents": []}
-
-    def get_turns(self, app_id, agent_id, request_options=None):
-        self.turn_calls.append((app_id, agent_id, request_options))
-        return {"turns": [{"agent_id": agent_id}]}
-
-    def get(self, app_id, agent_id, request_options=None):
-        self.get_calls.append((app_id, agent_id, request_options))
-        return {"agent_id": agent_id}
-
-
-class DummyAsyncAgents(DummyAgents):
-    async def start(self, app_id, **kwargs):
-        return super().start(app_id, **kwargs)
-
-    async def stop(self, app_id, agent_id, request_options=None):
-        return super().stop(app_id, agent_id, request_options)
-
-    async def speak(self, app_id, agent_id, request_options=None, **kwargs):
-        return super().speak(app_id, agent_id, request_options, **kwargs)
-
-    async def interrupt(self, app_id, agent_id, request_options=None):
-        return super().interrupt(app_id, agent_id, request_options)
-
-    async def update(self, app_id, agent_id, properties=None, request_options=None):
-        return super().update(app_id, agent_id, properties, request_options)
-
-    async def get_history(self, app_id, agent_id, request_options=None):
-        return super().get_history(app_id, agent_id, request_options)
-
-    async def get_turns(self, app_id, agent_id, request_options=None):
-        return super().get_turns(app_id, agent_id, request_options)
-
-    async def get(self, app_id, agent_id, request_options=None):
-        return super().get(app_id, agent_id, request_options)
-
-
-class DummyClient:
-    def __init__(
-        self,
-        *,
-        auth_mode: str = "basic",
-        app_id: str = "app-id",
-        app_certificate: Optional[str] = "app-cert",
-    ) -> None:
-        self.app_id = app_id
-        self.app_certificate = app_certificate
-        self.auth_mode = auth_mode
-        self.agents = DummyAgents()
-
-
-class DummyAsyncClient(DummyClient):
-    def __init__(self, **kwargs: Any) -> None:
-        super().__init__(**kwargs)
-        self.agents = DummyAsyncAgents()
diff --git a/tests/agentkit/test_agent.py b/tests/agentkit/test_agent.py
deleted file mode 100644
index 4bd0030..0000000
--- a/tests/agentkit/test_agent.py
+++ /dev/null
@@ -1,130 +0,0 @@
-from unittest import mock
-
-import pytest
-
-from agora_agent.agentkit import Agent
-from agora_agent.agentkit.vendors import (
-    DeepgramSTT,
-    ElevenLabsTTS,
-    OpenAI,
-    OpenAIRealtime,
-)
-from tests.agentkit.helpers import DummyClient, dump_model
-
-
-def test_builder_methods_are_immutable_and_reflected_in_config_and_getters():
-    agent = Agent(name="base", instructions="helpful")
-    llm = OpenAI(api_key="key", model="gpt-4o-mini")
-    tts = ElevenLabsTTS(key="tts", model_id="model", voice_id="voice", sample_rate=24000)
-    stt = DeepgramSTT(api_key="dg", model="nova-3")
-
-    configured = agent.with_llm(llm).with_tts(tts).with_stt(stt).with_greeting("hi").with_max_history(10)
-
-    assert agent.llm is None
-    assert configured.llm == llm.to_config()
-    assert configured.tts == tts.to_config()
-    assert configured.stt == stt.to_config()
-    assert configured.greeting == "hi"
-    assert configured.max_history == 10
-    assert configured.config["name"] == "base"
-
-
-def test_create_session_resolves_name_from_option_agent_or_timestamp():
-    client = DummyClient()
-    named = Agent(name="from-agent")
-    explicit = named.create_session(client, channel="c", agent_uid="1", remote_uids=["2"], name="explicit")
-    assert explicit.agent.name == "from-agent"
-    assert explicit._name == "explicit"
-
-    defaulted = named.create_session(client, channel="c", agent_uid="1", remote_uids=["2"])
-    assert defaulted._name == "from-agent"
-
-    with mock.patch("agora_agent.agentkit.agent.time.time", return_value=123456):
-        generated = Agent().create_session(client, channel="c", agent_uid="1", remote_uids=["2"])
-    assert generated._name == "agent-123456"
-
-
-def test_to_properties_throws_when_llm_or_tts_missing_outside_preset_or_pipeline_flow():
-    with pytest.raises(ValueError, match="TTS configuration is required"):
-        Agent().with_llm(OpenAI(api_key="key", model="gpt-4o-mini")).to_properties(
-            channel="room",
-            agent_uid="1",
-            remote_uids=["2"],
-            token="token",
-        )
-
-    with pytest.raises(ValueError, match="LLM configuration is required"):
-        Agent().with_tts(ElevenLabsTTS(key="tts", model_id="model", voice_id="voice", sample_rate=24000)).to_properties(
-            channel="room",
-            agent_uid="1",
-            remote_uids=["2"],
-            token="token",
-        )
-
-
-def test_to_properties_applies_defaults_and_overrides_for_standard_pipeline():
-    agent = (
-        Agent(instructions="top-level instructions", greeting="hello", failure_message="retry", max_history=7)
-        .with_llm(
-            OpenAI(
-                api_key="key",
-                model="gpt-4o-mini",
-                greeting_message="vendor greeting",
-                failure_message="vendor failure",
-            )
-        )
-        .with_tts(ElevenLabsTTS(key="tts", model_id="model", voice_id="voice", sample_rate=24000))
-        .with_stt(DeepgramSTT(api_key="dg", model="nova-3"))
-    )
-
-    props = dump_model(
-        agent.to_properties(channel="room", agent_uid="1", remote_uids=["2"], token="token")
-    )
-
-    assert props["llm"]["system_messages"] == [{"role": "system", "content": "top-level instructions"}]
-    assert props["llm"]["greeting_message"] == "vendor greeting"
-    assert props["llm"]["failure_message"] == "vendor failure"
-    assert props["llm"]["max_history"] == 7
-    assert props["tts"]["vendor"] == "elevenlabs"
-    assert props["asr"]["vendor"] == "deepgram"
-
-
-def test_to_properties_supports_preset_or_pipeline_backed_sessions_without_llm_or_tts():
-    props = dump_model(
-        Agent(instructions="preset-backed").to_properties(
-            channel="room",
-            agent_uid="1",
-            remote_uids=["2"],
-            token="token",
-            skip_vendor_validation=True,
-        )
-    )
-    assert props["channel"] == "room"
-    assert "llm" not in props
-    assert "tts" not in props
-
-
-def test_to_properties_generates_token_and_respects_mllm_vendor_precedence():
-    agent = Agent(greeting="top hello", failure_message="top fail", max_history=9).with_mllm(
-        OpenAIRealtime(
-            api_key="key",
-            url="wss://openai.example.com/realtime",
-            greeting_message="vendor hello",
-        )
-    ).with_advanced_features({"enable_mllm": True})
-
-    props = dump_model(
-        agent.to_properties(
-            channel="room",
-            agent_uid="1",
-            remote_uids=["2"],
-            app_id="app-id",
-            app_certificate="app-cert",
-        )
-    )
-
-    assert props["mllm"]["greeting_message"] == "vendor hello"
-    assert props["mllm"]["failure_message"] == "top fail"
-    assert props["mllm"]["max_history"] == 9
-    assert props["mllm"]["url"] == "wss://openai.example.com/realtime"
-    assert isinstance(props["token"], str) and props["token"]
diff --git a/tests/agentkit/test_agent_session.py b/tests/agentkit/test_agent_session.py
deleted file mode 100644
index f91722a..0000000
--- a/tests/agentkit/test_agent_session.py
+++ /dev/null
@@ -1,171 +0,0 @@
-import asyncio
-from unittest import mock
-
-import pytest
-
-from agora_agent.agentkit import Agent
-from agora_agent.agentkit.vendors import AkoolAvatar, DeepgramSTT, ElevenLabsTTS, OpenAI
-from agora_agent.core.api_error import ApiError
-from tests.agentkit.helpers import DummyAsyncClient, DummyClient, dump_model
-
-
-def build_standard_agent():
-    return (
-        Agent(name="assistant", instructions="be helpful")
-        .with_stt(DeepgramSTT(api_key="dg", model="nova-3"))
-        .with_llm(OpenAI(api_key="key", model="gpt-4o-mini"))
-        .with_tts(ElevenLabsTTS(key="tts", model_id="model", voice_id="voice", sample_rate=24000))
-    )
-
-
-def test_start_accepts_preset_arrays_and_normalizes_them():
-    client = DummyClient()
-    session = Agent(name="assistant").create_session(
-        client,
-        channel="room",
-        agent_uid="1",
-        remote_uids=["2"],
-        preset=["deepgram_nova_3", "openai_gpt_5_mini", "openai_tts_1"],
-    )
-    session.start()
-    _, kwargs = client.agents.start_calls[0]
-    assert kwargs["preset"] == "deepgram_nova_3,openai_gpt_5_mini,openai_tts_1"
-
-
-def test_session_methods_enforce_state_and_id_guards():
-    session = build_standard_agent().create_session(DummyClient(), channel="room", agent_uid="1", remote_uids=["2"])
-
-    for method_name in ["stop", "interrupt", "update", "say"]:
-        with pytest.raises(RuntimeError):
-            if method_name == "say":
-                getattr(session, method_name)("hello")
-            elif method_name == "update":
-                getattr(session, method_name)({})
-            else:
-                getattr(session, method_name)()
-
-    with pytest.raises(RuntimeError):
-        session.get_history()
-    with pytest.raises(RuntimeError):
-        session.get_info()
-    with pytest.raises(RuntimeError):
-        session.get_turns()
-
-
-def test_app_credentials_mode_adds_auth_headers_and_exposes_getters_and_raw_client():
-    client = DummyClient(auth_mode="app-credentials")
-    session = build_standard_agent().create_session(client, channel="room", agent_uid="1", remote_uids=["2"])
-
-    headers = session._convo_ai_headers()
-    assert headers is not None
-    assert headers["Authorization"].startswith("agora token=")
-    assert session.app_id == "app-id"
-    assert session.agent.name == "assistant"
-    assert session.raw is client.agents
-
-
-def test_event_handlers_can_be_added_removed_and_warning_path_exercised():
-    client = DummyClient()
-    session = build_standard_agent().create_session(client, channel="room", agent_uid="1", remote_uids=["2"])
-    received = []
-
-    def started(payload):
-        received.append(payload)
-
-    def failing(_payload):
-        raise RuntimeError("boom")
-
-    session.on("started", started)
-    session.on("started", failing)
-
-    with pytest.warns(UserWarning):
-        session.start()
-
-    assert received == [{"agent_id": "agent-1"}]
-    session.off("started", started)
-    session._emit("started", {"agent_id": "agent-2"})
-    assert received == [{"agent_id": "agent-1"}]
-
-
-def test_running_session_methods_call_underlying_client_helpers():
-    client = DummyClient()
-    session = build_standard_agent().create_session(client, channel="room", agent_uid="1", remote_uids=["2"])
-    session.start()
-    session.say("hello", priority="APPEND", interruptable=True)
-    session.interrupt()
-    session.update({"greeting_message": "updated"})
-    assert session.get_history() == {"contents": []}
-    assert session.get_info() == {"agent_id": "agent-1"}
-    assert session.get_turns() == {"turns": [{"agent_id": "agent-1"}]}
-    session.stop()
-
-    assert client.agents.speak_calls
-    assert client.agents.interrupt_calls
-    assert client.agents.update_calls
-    assert client.agents.history_calls
-    assert client.agents.get_calls
-    assert client.agents.turn_calls
-    assert client.agents.stop_calls
-
-
-def test_start_sets_status_to_error_and_emits_error_event_on_failure():
-    client = DummyClient()
-    client.agents.start_error = RuntimeError("start failed")
-    session = build_standard_agent().create_session(client, channel="room", agent_uid="1", remote_uids=["2"])
-    errors = []
-    session.on("error", errors.append)
-
-    with pytest.raises(RuntimeError, match="start failed"):
-        session.start()
-
-    assert session.status == "error"
-    assert len(errors) == 1
-
-
-def test_stop_swallows_404_and_non_404_moves_to_error():
-    client = DummyClient()
-    session = build_standard_agent().create_session(client, channel="room", agent_uid="1", remote_uids=["2"])
-    session.start()
-    client.agents.stop_error = ApiError(status_code=404)
-    session.stop()
-    assert session.status == "stopped"
-
-    client2 = DummyClient()
-    session2 = build_standard_agent().create_session(client2, channel="room", agent_uid="1", remote_uids=["2"])
-    session2.start()
-    client2.agents.stop_error = ApiError(status_code=500)
-    with pytest.raises(ApiError):
-        session2.stop()
-    assert session2.status == "error"
-
-
-def test_avatar_validation_warning_branch_and_async_session_methods():
-    agent = (
-        Agent(name="avatar")
-        .with_llm(OpenAI(api_key="key", model="gpt-4o-mini"))
-        .with_tts(ElevenLabsTTS(key="tts", model_id="model", voice_id="voice"))
-        .with_avatar(AkoolAvatar(api_key="akool", avatar_id="avatar-1"))
-    )
-    client = DummyClient()
-    session = agent.create_session(client, channel="room", agent_uid="1", remote_uids=["2"])
-    with pytest.warns(UserWarning):
-        session._validate_avatar_config()
-
-    async def run_async_case():
-        async_client = DummyAsyncClient()
-        async_session = build_standard_agent().create_async_session(
-            async_client,
-            channel="room",
-            agent_uid="1",
-            remote_uids=["2"],
-        )
-        await async_session.start()
-        await async_session.say("hello")
-        await async_session.interrupt()
-        await async_session.update({"greeting_message": "updated"})
-        assert await async_session.get_history() == {"contents": []}
-        assert await async_session.get_info() == {"agent_id": "agent-1"}
-        assert await async_session.get_turns() == {"turns": [{"agent_id": "agent-1"}]}
-        await async_session.stop()
-
-    asyncio.run(run_async_case())
diff --git a/tests/agentkit/test_agentkit_parity.py b/tests/agentkit/test_agentkit_parity.py
deleted file mode 100644
index a486449..0000000
--- a/tests/agentkit/test_agentkit_parity.py
+++ /dev/null
@@ -1,187 +0,0 @@
-from typing import Any, Dict, List, Tuple
-from types import SimpleNamespace
-import unittest
-
-from agora_agent.agentkit import (
-    Agent,
-    AnamAvatar,
-    GeminiLive,
-    LiveAvatarAvatar,
-    OpenAI,
-    OpenAITTS,
-    validate_avatar_config,
-    validate_tts_sample_rate,
-)
-from agora_agent.agentkit.vendors import DeepgramSTT
-
-
-class DummyAgents:
-    def __init__(self) -> None:
-        self.start_calls: List[Tuple[Any, Dict[str, Any]]] = []
-        self.turn_calls: List[Tuple[Any, Any, Any]] = []
-
-    def start(self, app_id, **kwargs):
-        self.start_calls.append((app_id, kwargs))
-        return SimpleNamespace(agent_id="agent-1")
-
-    def get_turns(self, app_id, agent_id, request_options=None):
-        self.turn_calls.append((app_id, agent_id, request_options))
-        return {"turns": [{"agent_id": agent_id}]}
-
-    def stop(self, *args, **kwargs):
-        return None
-
-    def speak(self, *args, **kwargs):
-        return None
-
-    def interrupt(self, *args, **kwargs):
-        return None
-
-    def update(self, *args, **kwargs):
-        return None
-
-    def get_history(self, *args, **kwargs):
-        return {"contents": []}
-
-    def get(self, *args, **kwargs):
-        return {"agent_id": "agent-1"}
-
-
-class DummyClient:
-    def __init__(self) -> None:
-        self.app_id = "app-id"
-        self.app_certificate = "app-cert"
-        self.auth_mode = "basic"
-        self.agents = DummyAgents()
-
-
-def dump_properties(properties):
-    if hasattr(properties, "model_dump"):
-        return properties.model_dump(exclude_none=True)
-    return properties
-
-
-class AgentKitParityTests(unittest.TestCase):
-    def test_start_supports_preset_and_pipeline_without_explicit_llm_or_tts(self):
-        client = DummyClient()
-        agent = Agent(name="preset-agent", instructions="Use preset defaults.")
-
-        session = agent.create_session(
-            client,
-            channel="room-1",
-            agent_uid="1",
-            remote_uids=["2"],
-            preset="deepgram_nova_3,openai_gpt_4o_mini,openai_tts_1",
-            pipeline_id="pipeline_123",
-        )
-
-        agent_id = session.start()
-
-        self.assertEqual(agent_id, "agent-1")
-        _, kwargs = client.agents.start_calls[0]
-        self.assertEqual(kwargs["preset"], "deepgram_nova_3,openai_gpt_4o_mini,openai_tts_1")
-        self.assertEqual(kwargs["pipeline_id"], "pipeline_123")
-        dumped = dump_properties(kwargs["properties"])
-        self.assertEqual(dumped["channel"], "room-1")
-        self.assertEqual(dumped["agent_rtc_uid"], "1")
-        self.assertNotIn("llm", dumped)
-        self.assertNotIn("tts", dumped)
-
-    def test_start_infers_reseller_presets_and_strips_credential_fields(self):
-        client = DummyClient()
-        agent = (
-            Agent(name="auto-preset", instructions="Use reseller defaults.")
-            .with_stt(DeepgramSTT(model="nova-3"))
-            .with_llm(OpenAI(model="gpt-5-mini"))
-            .with_tts(OpenAITTS(voice="alloy"))
-        )
-
-        session = agent.create_session(
-            client,
-            channel="room-2",
-            agent_uid="1",
-            remote_uids=["2"],
-        )
-
-        session.start()
-
-        _, kwargs = client.agents.start_calls[0]
-        self.assertEqual(kwargs["preset"], "deepgram_nova_3,openai_gpt_5_mini,openai_tts_1")
-        dumped = dump_properties(kwargs["properties"])
-        self.assertFalse(dumped["asr"].get("params"))
-        self.assertEqual(
-            dumped["llm"]["system_messages"],
-            [{"role": "system", "content": "Use reseller defaults."}],
-        )
-        self.assertEqual(dumped["llm"]["input_modalities"], ["text"])
-        self.assertFalse(dumped["llm"].get("api_key"))
-        self.assertEqual(dumped["tts"].get("params"), {"voice": "alloy"})
-
-    def test_session_get_turns_proxies_to_agents_client(self):
-        client = DummyClient()
-        session = Agent(name="assistant").create_session(
-            client,
-            channel="room-3",
-            agent_uid="1",
-            remote_uids=["2"],
-            preset="deepgram_nova_3,openai_gpt_4o_mini,openai_tts_1",
-        )
-        session.start()
-
-        turns = session.get_turns()
-
-        self.assertEqual(turns, {"turns": [{"agent_id": "agent-1"}]})
-        self.assertEqual(client.agents.turn_calls, [("app-id", "agent-1", None)])
-
-    def test_gemini_live_matches_low_level_shape(self):
-        config = GeminiLive(
-            api_key="google-key",
-            model="gemini-live-2.5-flash",
-            url="wss://generativelanguage.googleapis.com/ws",
-            instructions="You are concise.",
-            voice="Aoede",
-            greeting_message="Hello",
-            predefined_tools=["_publish_message"],
-            failure_message="Please try again.",
-            max_history=8,
-            additional_params={"temperature": 0.2},
-            messages=[{"role": "user", "content": "Hi"}],
-        ).to_config()
-
-        self.assertEqual(
-            config,
-            {
-                "vendor": "gemini",
-                "style": "openai",
-                "api_key": "google-key",
-                "url": "wss://generativelanguage.googleapis.com/ws",
-                "params": {
-                    "temperature": 0.2,
-                    "model": "gemini-live-2.5-flash",
-                    "instructions": "You are concise.",
-                    "voice": "Aoede",
-                },
-                "messages": [{"role": "user", "content": "Hi"}],
-                "greeting_message": "Hello",
-                "predefined_tools": ["_publish_message"],
-                "failure_message": "Please try again.",
-                "max_history": 8,
-            },
-        )
-
-    def test_liveavatar_and_anam_avatar_support_matches_typescript(self):
-        liveavatar = LiveAvatarAvatar(api_key="live-key", quality="high", agora_uid="42").to_config()
-        validate_avatar_config(liveavatar)
-        with self.assertRaisesRegex(ValueError, "LiveAvatar"):
-            validate_tts_sample_rate(liveavatar, 16000)
-
-        anam = AnamAvatar(api_key="anam-key", persona_id="persona-1").to_config()
-        validate_avatar_config(anam)
-        agent = Agent().with_tts(OpenAITTS(api_key="openai-key", voice="alloy")).with_avatar(
-            AnamAvatar(api_key="anam-key", persona_id="persona-1")
-        )
-        self.assertEqual(agent.avatar, anam)
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/tests/agentkit/test_constants.py b/tests/agentkit/test_constants.py
deleted file mode 100644
index 10a12b6..0000000
--- a/tests/agentkit/test_constants.py
+++ /dev/null
@@ -1,26 +0,0 @@
-from agora_agent.agentkit import (
-    DataChannel,
-    FillerWordsSelectionRule,
-    GeofenceArea,
-    GeofenceExcludeArea,
-    SalModeValues,
-    SilenceActionValues,
-    TurnDetectionTypeValues,
-)
-
-
-def test_constants_match_expected_values():
-    assert DataChannel.RTM == "rtm"
-    assert DataChannel.DATASTREAM == "datastream"
-    assert SilenceActionValues.SPEAK == "speak"
-    assert SilenceActionValues.THINK == "think"
-    assert SalModeValues.LOCKING == "locking"
-    assert SalModeValues.RECOGNITION == "recognition"
-    assert GeofenceArea.GLOBAL == "GLOBAL"
-    assert GeofenceArea.NORTH_AMERICA == "NORTH_AMERICA"
-    assert GeofenceExcludeArea.JAPAN == "JAPAN"
-    assert FillerWordsSelectionRule.SHUFFLE == "shuffle"
-    assert FillerWordsSelectionRule.ROUND_ROBIN == "round_robin"
-    assert TurnDetectionTypeValues.AGORA_VAD == "agora_vad"
-    assert TurnDetectionTypeValues.SERVER_VAD == "server_vad"
-    assert TurnDetectionTypeValues.SEMANTIC_VAD == "semantic_vad"
diff --git a/tests/agentkit/test_presets.py b/tests/agentkit/test_presets.py
deleted file mode 100644
index 51f38eb..0000000
--- a/tests/agentkit/test_presets.py
+++ /dev/null
@@ -1,86 +0,0 @@
-from agora_agent.agentkit import AgentPresets, normalize_preset_input
-from agora_agent.agentkit.presets import resolve_session_presets
-
-
-def test_preset_values_match_expected_strings():
-    assert AgentPresets.asr.deepgram_nova_2 == "deepgram_nova_2"
-    assert AgentPresets.asr.deepgram_nova_3 == "deepgram_nova_3"
-    assert AgentPresets.llm.openai_gpt_4o_mini == "openai_gpt_4o_mini"
-    assert AgentPresets.llm.openai_gpt_4_1_mini == "openai_gpt_4_1_mini"
-    assert AgentPresets.llm.openai_gpt_5_nano == "openai_gpt_5_nano"
-    assert AgentPresets.llm.openai_gpt_5_mini == "openai_gpt_5_mini"
-    assert AgentPresets.tts.minimax_speech_2_6_turbo == "minimax_speech_2_6_turbo"
-    assert AgentPresets.tts.minimax_speech_2_8_turbo == "minimax_speech_2_8_turbo"
-    assert AgentPresets.tts.openai_tts_1 == "openai_tts_1"
-
-
-def test_normalize_preset_input_variants():
-    assert normalize_preset_input(None) is None
-    assert normalize_preset_input("deepgram_nova_3") == "deepgram_nova_3"
-    assert (
-        normalize_preset_input(["deepgram_nova_3", "openai_gpt_5_mini"])
-        == "deepgram_nova_3,openai_gpt_5_mini"
-    )
-    assert (
-        normalize_preset_input(" deepgram_nova_3, , openai_gpt_5_mini ")
-        == "deepgram_nova_3,openai_gpt_5_mini"
-    )
-
-
-def test_resolve_session_presets_returns_none_when_nothing_inferrable():
-    preset, properties = resolve_session_presets(None, {"llm": {"vendor": "custom"}})
-    assert preset is None
-    assert properties["llm"] == {"vendor": "custom"}
-    assert properties["asr"] is None
-    assert properties["tts"] is None
-
-
-def test_resolve_session_presets_inferrs_and_strips_fields():
-    preset, properties = resolve_session_presets(
-        None,
-        {
-            "asr": {"vendor": "deepgram", "params": {"model": "nova-3"}},
-            "llm": {
-                "vendor": "openai",
-                "url": "https://api.openai.com/v1/chat/completions",
-                "params": {"model": "gpt-5-mini"},
-            },
-            "tts": {"vendor": "openai", "params": {"model": "tts-1", "voice": "alloy"}},
-        },
-    )
-    assert preset == "deepgram_nova_3,openai_gpt_5_mini,openai_tts_1"
-    assert properties["asr"] == {"vendor": "deepgram"}
-    assert properties["llm"] == {"vendor": "openai"}
-    assert properties["tts"] == {"vendor": "openai", "params": {"voice": "alloy"}}
-
-
-def test_resolve_session_presets_minimax_and_explicit_precedence():
-    preset, properties = resolve_session_presets(
-        "deepgram_nova_2",
-        {
-            "asr": {"vendor": "deepgram", "params": {"model": "nova-3"}},
-            "tts": {"vendor": "minimax", "params": {"model": "speech-2.8-turbo"}},
-        },
-    )
-    assert preset == "deepgram_nova_2,minimax_speech_2_8_turbo"
-    assert properties["asr"]["params"] == {"model": "nova-3"}
-    assert properties["tts"] == {"vendor": "minimax"}
-
-
-def test_resolve_session_presets_skips_inference_when_credentials_or_nonstandard_values_present():
-    assert resolve_session_presets(
-        None, {"asr": {"vendor": "deepgram", "params": {"model": "nova-3", "api_key": "key"}}}
-    )[0] is None
-    assert resolve_session_presets(
-        None,
-        {
-            "llm": {
-                "vendor": "openai",
-                "url": "https://example.com/chat/completions",
-                "params": {"model": "gpt-5-mini"},
-            }
-        },
-    )[0] is None
-    assert resolve_session_presets(
-        None, {"tts": {"vendor": "minimax", "params": {"model": "speech-2.8-turbo", "key": "secret"}}}
-    )[0] is None
diff --git a/tests/agentkit/test_token.py b/tests/agentkit/test_token.py
deleted file mode 100644
index cb4b37a..0000000
--- a/tests/agentkit/test_token.py
+++ /dev/null
@@ -1,69 +0,0 @@
-from unittest import mock
-
-import pytest
-
-from agora_agent.agentkit.token import (
-    MAX_EXPIRY_SECONDS,
-    expires_in_hours,
-    expires_in_minutes,
-    generate_convo_ai_token,
-    generate_rtc_token,
-)
-
-
-def test_expires_in_helpers_validate_and_cap_values():
-    with pytest.raises(ValueError):
-        expires_in_hours(0)
-    with pytest.raises(ValueError):
-        expires_in_minutes(-1)
-
-    with pytest.warns(UserWarning):
-        assert expires_in_hours(30) == MAX_EXPIRY_SECONDS
-    with pytest.warns(UserWarning):
-        assert expires_in_minutes(60 * 30) == MAX_EXPIRY_SECONDS
-
-    assert expires_in_hours(1.5) == 5400
-    assert expires_in_minutes(2.5) == 150
-
-
-def test_token_generators_return_non_empty_strings():
-    rtc = generate_rtc_token(
-        app_id="a" * 32,
-        app_certificate="b" * 32,
-        channel="demo",
-        uid=1,
-    )
-    convo = generate_convo_ai_token(
-        app_id="a" * 32,
-        app_certificate="b" * 32,
-        channel_name="demo",
-        account="1",
-    )
-    assert isinstance(rtc, str) and rtc
-    assert isinstance(convo, str) and convo
-
-
-def test_generate_convo_ai_token_uses_builder_when_available_and_defaults_privilege_expire():
-    fake_builder = mock.Mock()
-    fake_builder.buildTokenWithRtm.return_value = "token-123"
-
-    with mock.patch.dict("sys.modules", {"agora_token_builder": mock.Mock(RtcTokenBuilder=fake_builder)}):
-        token = generate_convo_ai_token(
-            app_id="app-id",
-            app_certificate="app-cert",
-            channel_name="demo",
-            account="1",
-            token_expire=120,
-            privilege_expire=0,
-        )
-
-    assert token == "token-123"
-    fake_builder.buildTokenWithRtm.assert_called_once_with(
-        "app-id",
-        "app-cert",
-        "demo",
-        "1",
-        1,
-        120,
-        0,
-    )
diff --git a/tests/agentkit/test_vendors.py b/tests/agentkit/test_vendors.py
deleted file mode 100644
index 70a923e..0000000
--- a/tests/agentkit/test_vendors.py
+++ /dev/null
@@ -1,160 +0,0 @@
-import pytest
-
-from agora_agent.agentkit import (
-    AnamAvatar,
-    GeminiLive,
-    LiveAvatarAvatar,
-    validate_avatar_config,
-    validate_tts_sample_rate,
-)
-from agora_agent.agentkit.vendors import (
-    AmazonSTT,
-    AmazonTTS,
-    Anthropic,
-    AresSTT,
-    AssemblyAISTT,
-    AzureOpenAI,
-    CartesiaTTS,
-    DeepgramSTT,
-    ElevenLabsTTS,
-    FishAudioTTS,
-    Gemini,
-    GoogleSTT,
-    GoogleTTS,
-    HeyGenAvatar,
-    HumeAITTS,
-    MicrosoftSTT,
-    MicrosoftTTS,
-    MiniMaxTTS,
-    MurfTTS,
-    OpenAI,
-    OpenAIRealtime,
-    OpenAISTT,
-    OpenAITTS,
-    RimeTTS,
-    SarvamSTT,
-    SarvamTTS,
-    SpeechmaticsSTT,
-    VertexAI,
-)
-
-
-def test_llm_vendor_mappings_cover_core_shapes_and_defaults():
-    assert OpenAI(api_key="key", model="gpt-4o-mini").to_config()["url"] == "https://api.openai.com/v1/chat/completions"
-    assert "api_key" not in OpenAI(model="gpt-5-mini").to_config()
-    assert "params" not in AzureOpenAI(
-        api_key="key",
-        endpoint="https://azure.example.com",
-        deployment_name="deploy",
-    ).to_config()
-    anthropic = Anthropic(api_key="key", model="claude", temperature=0.3, top_p=0.7).to_config()
-    assert anthropic["params"]["temperature"] == 0.3
-    assert anthropic["params"]["top_p"] == 0.7
-    gemini = Gemini(api_key="key", model="gemini", temperature=0.2, top_p=0.8, top_k=10).to_config()
-    assert gemini["style"] == "gemini"
-    assert gemini["params"]["top_k"] == 10
-
-
-def test_mllm_vendor_mappings_cover_optional_branches():
-    realtime = OpenAIRealtime(
-        api_key="key",
-        url="wss://openai.example.com/realtime",
-        predefined_tools=["_publish_message"],
-        failure_message="Retry",
-        max_history=3,
-    ).to_config()
-    assert realtime == {
-        "vendor": "openai",
-        "style": "openai",
-        "api_key": "key",
-        "url": "wss://openai.example.com/realtime",
-        "predefined_tools": ["_publish_message"],
-        "failure_message": "Retry",
-        "max_history": 3,
-    }
-
-    vertex = VertexAI(
-        model="gemini-live",
-        url="wss://vertex.example.com/realtime",
-        project_id="project",
-        location="us-central1",
-        adc_credentials_string="creds",
-        additional_params={"temperature": 0.2},
-        predefined_tools=["_publish_message"],
-        failure_message="Try again",
-        max_history=5,
-    ).to_config()
-    assert vertex["vendor"] == "vertexai"
-    assert vertex["url"] == "wss://vertex.example.com/realtime"
-    assert vertex["params"]["temperature"] == 0.2
-    assert vertex["predefined_tools"] == ["_publish_message"]
-    assert vertex["failure_message"] == "Try again"
-    assert vertex["max_history"] == 5
-
-    gemini_live = GeminiLive(
-        api_key="key",
-        model="gemini-live",
-        url="wss://gemini.example.com/realtime",
-        voice="Aoede",
-        predefined_tools=["_publish_message"],
-        failure_message="Please try again.",
-        max_history=8,
-    ).to_config()
-    assert gemini_live["vendor"] == "gemini"
-    assert gemini_live["url"] == "wss://gemini.example.com/realtime"
-    assert gemini_live["params"]["voice"] == "Aoede"
-    assert gemini_live["predefined_tools"] == ["_publish_message"]
-    assert gemini_live["failure_message"] == "Please try again."
-    assert gemini_live["max_history"] == 8
-
-
-def test_stt_vendor_mappings_cover_all_wrappers():
-    assert SpeechmaticsSTT(api_key="key", language="en").to_config()["vendor"] == "speechmatics"
-    assert DeepgramSTT(api_key="key", model="nova-3", smart_format=True, punctuation=True).to_config()["params"][
-        "smart_format"
-    ]
-    assert MicrosoftSTT(key="key", region="eastus").to_config()["vendor"] == "microsoft"
-    assert OpenAISTT(api_key="key", model="whisper-1").to_config()["vendor"] == "openai"
-    assert GoogleSTT(api_key="key", language="en-US").to_config()["vendor"] == "google"
-    assert AmazonSTT(access_key="a", secret_key="b", region="us-east-1").to_config()["vendor"] == "amazon"
-    assert AssemblyAISTT(api_key="key").to_config()["vendor"] == "assemblyai"
-    assert AresSTT(language="en").to_config()["vendor"] == "ares"
-    assert SarvamSTT(api_key="key", language="en").to_config()["vendor"] == "sarvam"
-
-
-def test_tts_vendor_mappings_cover_all_wrappers_and_skip_patterns():
-    assert ElevenLabsTTS(key="key", model_id="model", voice_id="voice", skip_patterns=[1]).to_config()["skip_patterns"] == [1]
-    assert MicrosoftTTS(key="key", region="eastus", voice_name="voice").to_config()["vendor"] == "microsoft"
-    assert OpenAITTS(voice="alloy").to_config()["params"] == {"voice": "alloy"}
-    assert CartesiaTTS(api_key="key", voice_id="voice").to_config()["params"]["voice"]["id"] == "voice"
-    assert GoogleTTS(key="key", voice_name="voice").to_config()["vendor"] == "google"
-    assert AmazonTTS(access_key="a", secret_key="b", region="us-east-1", voice_id="voice").to_config()["vendor"] == "amazon"
-    assert HumeAITTS(key="key").to_config()["vendor"] == "humeai"
-    assert RimeTTS(key="key", speaker="speaker").to_config()["vendor"] == "rime"
-    assert FishAudioTTS(key="key", reference_id="ref").to_config()["vendor"] == "fishaudio"
-    assert MiniMaxTTS(model="speech-2.8-turbo").to_config()["params"] == {"model": "speech-2.8-turbo"}
-    assert SarvamTTS(key="key", speaker="speaker", target_language_code="en-IN").to_config()["vendor"] == "sarvam"
-    assert MurfTTS(key="key", voice_id="voice").to_config()["vendor"] == "murf"
-
-
-def test_avatar_vendor_mappings_and_validators_cover_failure_branches():
-    with pytest.raises(ValueError, match="quality"):
-        HeyGenAvatar(api_key="key", quality="bad", agora_uid="1")
-
-    liveavatar = LiveAvatarAvatar(
-        api_key="key",
-        quality="high",
-        agora_uid="1",
-        avatar_id="avatar",
-        disable_idle_timeout=True,
-        activity_idle_timeout=30,
-    ).to_config()
-    assert liveavatar["vendor"] == "liveavatar"
-    validate_avatar_config(liveavatar)
-
-    anam = AnamAvatar(api_key="key", persona_id="persona").to_config()
-    assert anam["vendor"] == "anam"
-    validate_avatar_config(anam)
-
-    with pytest.raises(ValueError, match="HeyGen"):
-        validate_tts_sample_rate(HeyGenAvatar(api_key="key", quality="high", agora_uid="1").to_config(), 16000)
diff --git a/tests/custom/test_client.py b/tests/custom/test_client.py
index ab04ce6..77fae36 100644
--- a/tests/custom/test_client.py
+++ b/tests/custom/test_client.py
@@ -1,7 +1,265 @@
-import pytest
+from agora_agent.agentkit.agent import Agent, AdvancedFeatures, InterruptionConfig, MllmTurnDetectionConfig, TurnDetectionConfig
+from agora_agent.agentkit.constants import TurnDetectionTypeValues
+import asyncio
+import warnings
+from agora_agent.agentkit.agent_session import AgentSession, AsyncAgentSession
+from agora_agent.agentkit.vendors import DeepgramTTS, HeyGenAvatar, MicrosoftTTS, OpenAI, OpenAIRealtime
+from agora_agent.agentkit import AgentThinkResponse
+from typing import Any, Dict, List, Tuple
 
 
-# Get started with writing tests with pytest at https://docs.pytest.org
-@pytest.mark.skip(reason="Unimplemented")
-def test_client() -> None:
-    assert True
+class _AgentManagementStub:
+    def __init__(self) -> None:
+        self.calls: List[Tuple[str, str, Dict[str, Any]]] = []
+
+    def agent_think(self, appid, agent_id, **kwargs):  # noqa: ANN001
+        self.calls.append((appid, agent_id, kwargs))
+        return AgentThinkResponse(agent_id=agent_id, channel="room", start_ts=1)
+
+
+class _ClientStub:
+    auth_mode = "basic"
+
+    def __init__(self) -> None:
+        self.agents = object()
+        self.agent_management = _AgentManagementStub()
+
+
+class _AsyncAgentManagementStub:
+    def __init__(self) -> None:
+        self.calls: List[Tuple[str, str, Dict[str, Any]]] = []
+
+    async def agent_think(self, appid, agent_id, **kwargs):  # noqa: ANN001
+        self.calls.append((appid, agent_id, kwargs))
+        return AgentThinkResponse(agent_id=agent_id, channel="room", start_ts=1)
+
+
+class _AsyncClientStub:
+    auth_mode = "basic"
+
+    def __init__(self) -> None:
+        self.agents = object()
+        self.agent_management = _AsyncAgentManagementStub()
+
+
+def test_agentkit_think_routes_to_agent_management() -> None:
+    client = _ClientStub()
+    session = AgentSession(
+        client=client,
+        agent=Agent(),
+        app_id="appid",
+        name="agent",
+        channel="room",
+        token="token",
+        agent_uid="1",
+        remote_uids=["2"],
+    )
+    session._status = "running"
+    session._agent_id = "agent-1"
+
+    response = session.think("Injected instruction", on_thinking_action="interrupt")
+    assert response.agent_id == "agent-1"
+    assert len(client.agent_management.calls) == 1
+    appid, agent_id, kwargs = client.agent_management.calls[0]
+    assert appid == "appid"
+    assert agent_id == "agent-1"
+    assert kwargs["text"] == "Injected instruction"
+    assert kwargs["on_thinking_action"] == "interrupt"
+
+
+def test_async_agentkit_think_routes_to_agent_management() -> None:
+    async def _run() -> None:
+        client = _AsyncClientStub()
+        session = AsyncAgentSession(
+            client=client,
+            agent=Agent(),
+            app_id="appid",
+            name="agent",
+            channel="room",
+            token="token",
+            agent_uid="1",
+            remote_uids=["2"],
+        )
+        session._status = "running"
+        session._agent_id = "agent-1"
+
+        response = await session.think("Injected instruction", on_thinking_action="interrupt")
+        assert response.agent_id == "agent-1"
+        assert len(client.agent_management.calls) == 1
+        appid, agent_id, kwargs = client.agent_management.calls[0]
+        assert appid == "appid"
+        assert agent_id == "agent-1"
+        assert kwargs["text"] == "Injected instruction"
+        assert kwargs["on_thinking_action"] == "interrupt"
+
+    asyncio.run(_run())
+
+
+def test_llm_vendor_headers_are_forwarded_to_properties() -> None:
+    agent = Agent().with_llm(
+        OpenAI(
+            api_key="openai-key",
+            model="gpt-4o-mini",
+            headers={"X-Trace-Id": "trace-123"},
+            output_modalities=["text", "audio"],
+            greeting_configs={"mode": "single_first"},
+            template_variables={"caller_name": "Ada"},
+        )
+    ).with_tts(MicrosoftTTS(key="tts-key", region="eastus", voice_name="en-US-JennyNeural"))
+
+    props = agent.to_properties(
+        channel="room",
+        token="rtc-token",
+        agent_uid="1",
+        remote_uids=["2"],
+    )
+
+    assert props.llm is not None
+    assert props.llm.headers == {"X-Trace-Id": "trace-123"}
+    assert props.llm.output_modalities == ["text", "audio"]
+    assert props.llm.greeting_configs is not None
+    assert props.llm.greeting_configs.mode == "single_first"
+    assert props.llm.template_variables == {"caller_name": "Ada"}
+
+
+def test_with_turn_detection_forwards_config() -> None:
+    turn_detection = TurnDetectionConfig(
+        type=TurnDetectionTypeValues.AGORA_VAD,
+        threshold=0.5,
+    )
+
+    props = Agent().with_turn_detection(turn_detection).to_properties(
+        channel="room",
+        token="rtc-token",
+        agent_uid="1",
+        remote_uids=["2"],
+        skip_vendor_validation=True,
+    )
+
+    assert props.turn_detection == turn_detection
+
+
+def test_with_interruption_forwards_config() -> None:
+    interruption = InterruptionConfig(
+        enable=False,
+        disabled_config={"strategy": "ignore"},
+    )
+
+    props = Agent().with_interruption(interruption).to_properties(
+        channel="room",
+        token="rtc-token",
+        agent_uid="1",
+        remote_uids=["2"],
+        skip_vendor_validation=True,
+    )
+
+    assert props.interruption == interruption
+
+
+def test_mllm_turn_detection_is_forwarded_without_legacy_style() -> None:
+    mllm_turn_detection = MllmTurnDetectionConfig(
+        mode="server_vad",
+        server_vad_config={"idle_timeout_ms": 5000},
+    )
+    props = Agent().with_mllm(
+        OpenAIRealtime(api_key="openai-key", turn_detection=mllm_turn_detection)
+    ).to_properties(
+        channel="room",
+        token="rtc-token",
+        agent_uid="1",
+        remote_uids=["2"],
+    )
+
+    assert props.mllm is not None
+    assert props.mllm.vendor == "openai"
+    assert "style" not in props.mllm.dict()
+    assert props.mllm.turn_detection == mllm_turn_detection
+
+
+def test_with_mllm_sets_mllm_enable_without_legacy_flag() -> None:
+    agent = Agent().with_mllm(OpenAIRealtime(api_key="openai-key"))
+
+    props = agent.to_properties(
+        channel="room",
+        token="rtc-token",
+        agent_uid="1",
+        remote_uids=["2"],
+    )
+
+    assert props.mllm is not None
+    assert props.mllm.enable is True
+    assert props.advanced_features is None
+
+
+def test_with_mllm_removes_deprecated_enable_mllm_from_existing_advanced_features() -> None:
+    agent = Agent(
+        advanced_features=AdvancedFeatures(enable_mllm=True, enable_rtm=True)
+    ).with_mllm(OpenAIRealtime(api_key="openai-key"))
+
+    props = agent.to_properties(
+        channel="room",
+        token="rtc-token",
+        agent_uid="1",
+        remote_uids=["2"],
+    )
+
+    assert props.mllm is not None
+    assert props.mllm.enable is True
+    assert props.advanced_features is not None
+    assert props.advanced_features.enable_mllm is None
+    assert props.advanced_features.enable_rtm is True
+
+
+def test_with_mllm_drops_advanced_features_when_only_deprecated_enable_mllm_was_set() -> None:
+    props = Agent(
+        advanced_features=AdvancedFeatures(enable_mllm=True)
+    ).with_mllm(OpenAIRealtime(api_key="openai-key")).to_properties(
+        channel="room",
+        token="rtc-token",
+        agent_uid="1",
+        remote_uids=["2"],
+    )
+
+    assert props.mllm is not None
+    assert props.mllm.enable is True
+    assert props.advanced_features is None
+
+
+def test_with_tools_sets_enable_tools() -> None:
+    props = Agent().with_tools().to_properties(
+        channel="room",
+        token="rtc-token",
+        agent_uid="1",
+        remote_uids=["2"],
+        skip_vendor_validation=True,
+    )
+
+    assert props.advanced_features is not None
+    assert props.advanced_features.enable_tools is True
+
+
+def test_heygen_avatar_emits_deprecation_warning() -> None:
+    with warnings.catch_warnings(record=True) as caught:
+        warnings.simplefilter("always")
+        HeyGenAvatar(api_key="heygen-key", quality="high", agora_uid="42")
+
+    assert any("HeyGenAvatar is deprecated" in str(warning.message) for warning in caught)
+
+
+def test_deepgram_tts_vendor_config() -> None:
+    tts = DeepgramTTS(
+        api_key="deepgram-key",
+        model="aura-2-thalia-en",
+        base_url="wss://api.deepgram.com/v1/speak",
+        sample_rate=24000,
+        params={"encoding": "linear16"},
+    ).to_config()
+
+    assert tts["vendor"] == "deepgram"
+    assert tts["params"] == {
+        "api_key": "deepgram-key",
+        "model": "aura-2-thalia-en",
+        "base_url": "wss://api.deepgram.com/v1/speak",
+        "sample_rate": 24000,
+        "encoding": "linear16",
+    }
diff --git a/tests/custom/test_presets.py b/tests/custom/test_presets.py
new file mode 100644
index 0000000..c05c477
--- /dev/null
+++ b/tests/custom/test_presets.py
@@ -0,0 +1,135 @@
+from agora_agent.agentkit.presets import resolve_session_presets
+
+
+def test_minimax_preset_strips_group_id_and_url_when_no_key() -> None:
+    """When no key is provided, preset is inferred and credential fields are stripped."""
+    properties = {
+        "tts": {
+            "vendor": "minimax",
+            "params": {
+                "group_id": "my-group",
+                "model": "speech-2.6-turbo",
+                "url": "wss://api-uw.minimax.io/ws/v1/t2a_v2",
+                "voice_setting": {"voice_id": "English_captivating_female1"},
+            },
+        }
+    }
+    preset, resolved = resolve_session_presets(None, properties)
+    assert preset == "minimax_speech_2_6_turbo"
+    params = resolved["tts"]["params"]
+    assert "group_id" not in params
+    assert "url" not in params
+    assert "model" not in params
+    assert params["voice_setting"]["voice_id"] == "English_captivating_female1"
+
+
+def test_minimax_preset_strips_group_id_and_url_for_28_turbo() -> None:
+    properties = {
+        "tts": {
+            "vendor": "minimax",
+            "params": {
+                "group_id": "org-123",
+                "model": "speech-2.8-turbo",
+                "url": "wss://api.minimax.io/ws/v1/t2a_v2",
+                "voice_setting": {"voice_id": "some-voice"},
+            },
+        }
+    }
+    preset, resolved = resolve_session_presets(None, properties)
+    assert preset == "minimax_speech_2_8_turbo"
+    params = resolved["tts"]["params"]
+    assert "group_id" not in params
+    assert "url" not in params
+    assert "model" not in params
+
+
+def test_minimax_preset_strips_group_id_and_url_with_underscore_model_name() -> None:
+    properties = {
+        "tts": {
+            "vendor": "minimax",
+            "params": {
+                "group_id": "my-group",
+                "model": "speech_2_6_turbo",
+                "url": "wss://api-uw.minimax.io/ws/v1/t2a_v2",
+            },
+        }
+    }
+    preset, resolved = resolve_session_presets(None, properties)
+    assert preset == "minimax_speech_2_6_turbo"
+    params = resolved["tts"].get("params") or {}
+    assert "group_id" not in params
+    assert "url" not in params
+    assert "model" not in params
+
+
+def test_minimax_preset_not_inferred_when_key_present() -> None:
+    """When user provides their own key, preset is NOT inferred and nothing is stripped."""
+    properties = {
+        "tts": {
+            "vendor": "minimax",
+            "params": {
+                "key": "user-secret",
+                "group_id": "my-group",
+                "model": "speech-2.6-turbo",
+            },
+        }
+    }
+    preset, resolved = resolve_session_presets(None, properties)
+    assert preset is None
+    params = resolved["tts"]["params"]
+    assert params.get("key") == "user-secret"
+    assert params.get("group_id") == "my-group"
+
+
+def test_minimax_preset_not_inferred_when_explicit_preset_given() -> None:
+    """When an explicit tts preset is provided, tts inference is skipped."""
+    properties = {
+        "tts": {
+            "vendor": "minimax",
+            "params": {
+                "group_id": "my-group",
+                "model": "speech-2.6-turbo",
+            },
+        }
+    }
+    preset, resolved = resolve_session_presets("minimax_speech_2_6_turbo", properties)
+    assert preset == "minimax_speech_2_6_turbo"
+    # Explicit preset: tts inference is skipped, params are NOT stripped
+    params = resolved["tts"]["params"]
+    assert params.get("group_id") == "my-group"
+
+
+def test_deepgram_preset_strips_model_and_api_key() -> None:
+    properties = {
+        "asr": {
+            "vendor": "deepgram",
+            "params": {
+                "model": "nova-3",
+                "language": "en-US",
+            },
+        }
+    }
+    preset, resolved = resolve_session_presets(None, properties)
+    assert preset == "deepgram_nova_3"
+    params = resolved["asr"]["params"]
+    assert "model" not in params
+    assert "api_key" not in params
+    assert params.get("language") == "en-US"
+
+
+def test_openai_llm_preset_strips_model_api_key_and_default_url() -> None:
+    properties = {
+        "llm": {
+            "vendor": "openai",
+            "url": "https://api.openai.com/v1/chat/completions",
+            "params": {
+                "model": "gpt-4o-mini",
+            },
+        }
+    }
+    preset, resolved = resolve_session_presets(None, properties)
+    assert preset == "openai_gpt_4o_mini"
+    llm = resolved["llm"]
+    assert "api_key" not in llm
+    assert "url" not in llm
+    assert "model" not in (llm.get("params") or {})