livekit
diff --git a/‎livekit-agents/livekit/agents/llm/_provider_format/anthropic.py‎
Lines changed: 21 additions & 9 deletions b/‎livekit-agents/livekit/agents/llm/_provider_format/anthropic.py‎
Lines changed: 21 additions & 9 deletions
diff --git a/‎livekit-agents/livekit/agents/llm/mcp.py‎
Lines changed: 44 additions & 15 deletions b/‎livekit-agents/livekit/agents/llm/mcp.py‎
Lines changed: 44 additions & 15 deletions
diff --git a/‎livekit-agents/livekit/agents/llm/tool_context.py‎
Lines changed: 3 additions & 1 deletion b/‎livekit-agents/livekit/agents/llm/tool_context.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎livekit-agents/livekit/agents/stt/fallback_adapter.py‎
Lines changed: 7 additions & 0 deletions b/‎livekit-agents/livekit/agents/stt/fallback_adapter.py‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎livekit-agents/livekit/agents/utils/deprecation.py‎
Lines changed: 4 additions & 3 deletions b/‎livekit-agents/livekit/agents/utils/deprecation.py‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎livekit-agents/livekit/agents/voice/agent_session.py‎
Lines changed: 50 additions & 26 deletions b/‎livekit-agents/livekit/agents/voice/agent_session.py‎
Lines changed: 50 additions & 26 deletions
diff --git a/‎livekit-plugins/livekit-plugins-anthropic/livekit/plugins/anthropic/llm.py‎
Lines changed: 6 additions & 1 deletion b/‎livekit-plugins/livekit-plugins-anthropic/livekit/plugins/anthropic/llm.py‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py‎
Lines changed: 4 additions & 0 deletions b/‎livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py‎
Lines changed: 4 additions & 0 deletions
@@ -125,18 +125,30 @@ def _to_image_content(image: llm.ImageContent) -> dict[str, Any]:
     }
 
 
-def to_fnc_ctx(tool_ctx: llm.ToolContext) -> list[dict[str, Any]]:
+def to_fnc_ctx(tool_ctx: llm.ToolContext, *, strict: bool = True) -> list[dict[str, Any]]:
     schemas: list[dict[str, Any]] = []
     for tool in tool_ctx.function_tools.values():
         if isinstance(tool, llm.FunctionTool):
-            fnc = llm.utils.build_legacy_openai_schema(tool, internally_tagged=True)
-            schemas.append(
-                {
-                    "name": fnc["name"],
-                    "description": fnc["description"] or "",
-                    "input_schema": fnc["parameters"],
-                }
-            )
+            if strict:
+                fnc = llm.utils.build_strict_openai_schema(tool)
+                function_data = fnc["function"]
+                schemas.append(
+                    {
+                        "name": function_data["name"],
+                        "description": function_data.get("description") or "",
+                        "input_schema": function_data["parameters"],
+                        "strict": True,
+                    }
+                )
+            else:
+                fnc = llm.utils.build_legacy_openai_schema(tool, internally_tagged=True)
+                schemas.append(
+                    {
+                        "name": fnc["name"],
+                        "description": fnc["description"] or "",
+                        "input_schema": fnc["parameters"],
+                    }
+                )
         elif isinstance(tool, llm.RawFunctionTool):
             info = tool.info
             schemas.append(
 
@@ -6,7 +6,7 @@
 import json
 from abc import ABC, abstractmethod
 from collections.abc import Awaitable, Callable
-from contextlib import AbstractAsyncContextManager, AsyncExitStack, asynccontextmanager
+from contextlib import AbstractAsyncContextManager, asynccontextmanager
 from dataclasses import dataclass
 from datetime import timedelta
 from pathlib import Path
@@ -16,6 +16,7 @@
 from anyio.streams.memory import MemoryObjectReceiveStream, MemoryObjectSendStream
 from typing_extensions import Self
 
+from ..log import logger
 from .tool_context import Toolset
 
 try:
@@ -79,7 +80,6 @@ def __init__(
         tool_result_resolver: MCPToolResultResolver | None = None,
     ) -> None:
         self._client: ClientSession | None = None
-        self._exit_stack: AsyncExitStack = AsyncExitStack()
         self._read_timeout = client_session_timeout_seconds
         self._tool_result_resolver: MCPToolResultResolver = (
             tool_result_resolver or _default_tool_result_resolver
@@ -88,6 +88,10 @@ def __init__(
         self._cache_dirty = True
         self._lk_tools: list[MCPTool] | None = None
 
+        self._client_task: asyncio.Task[None] | None = None
+        self._closing_ev = asyncio.Event()
+        self._ready_fut: asyncio.Future[None] | None = None
+
     @property
     def initialized(self) -> bool:
         return self._client is not None
@@ -96,22 +100,45 @@ def invalidate_cache(self) -> None:
         self._cache_dirty = True
 
     async def initialize(self) -> None:
+        if self._client_task and not self._client_task.done():
+            logger.warning("MCPServer is already initializing")
+            if self._ready_fut:
+                await self._ready_fut
+            return
+
+        self._ready_fut = ready_fut = asyncio.Future[None]()
+        self._client_task = asyncio.create_task(
+            self._run_client(ready_fut), name=f"{type(self).__name__}._run_client"
+        )
+        await ready_fut
+
+    async def _run_client(self, ready_fut: asyncio.Future[None]) -> None:
         try:
-            streams = await self._exit_stack.enter_async_context(self.client_streams())
-            receive_stream, send_stream = streams[0], streams[1]
-            self._client = await self._exit_stack.enter_async_context(
-                ClientSession(
+            async with self.client_streams() as streams:
+                receive_stream, send_stream = streams[0], streams[1]
+                async with ClientSession(
                     receive_stream,
                     send_stream,
                     read_timeout_seconds=timedelta(seconds=self._read_timeout)
                     if self._read_timeout
                     else None,
-                )
-            )
-            await self._client.initialize()  # type: ignore[union-attr]
-        except Exception:
-            await self.aclose()
-            raise
+                ) as client:
+                    await client.initialize()
+                    self._client = client
+                    ready_fut.set_result(None)
+
+                    await self._closing_ev.wait()
+        except BaseException as e:
+            if not ready_fut.done():
+                ready_fut.set_exception(e)  # raising from `await initialize()`
+            else:
+                if isinstance(e, Exception):
+                    logger.exception("MCP client connection failed with unexpected error")
+                raise
+        finally:
+            self._client = None
+            self._lk_tools = None
+            self._closing_ev.clear()
 
     async def list_tools(self) -> list[MCPTool]:
         if self._client is None:
@@ -171,11 +198,13 @@ async def _tool_called(raw_arguments: dict[str, Any]) -> Any:
         return function_tool(_tool_called, raw_schema=raw_schema)
 
     async def aclose(self) -> None:
+        self._closing_ev.set()
         try:
-            await self._exit_stack.aclose()
+            if self._client_task:
+                await self._client_task
+                self._client_task = None
         finally:
-            self._client = None
-            self._lk_tools = None
+            self._closing_ev.clear()
 
     @abstractmethod
     def client_streams(
 
@@ -514,7 +514,9 @@ def parse_function_tools(
     def parse_function_tools(self, format: Literal["aws"]) -> list[dict[str, Any]]: ...
 
     @overload
-    def parse_function_tools(self, format: Literal["anthropic"]) -> list[dict[str, Any]]: ...
+    def parse_function_tools(
+        self, format: Literal["anthropic"], *, strict: bool = True
+    ) -> list[dict[str, Any]]: ...
 
     def parse_function_tools(
         self,
 
@@ -68,11 +68,18 @@ def __init__(
                 StreamAdapter(stt=t, vad=vad) if not t.capabilities.streaming else t for t in stt
             ]
 
+        # Use the primary STT's aligned_transcript if all providers support it, since
+        # the SDK only checks truthiness, not the specific granularity.
+        aligned_transcript: Literal["word", "chunk", False] = False
+        if all(t.capabilities.aligned_transcript for t in stt):
+            aligned_transcript = stt[0].capabilities.aligned_transcript
+
         super().__init__(
             capabilities=STTCapabilities(
                 streaming=True,
                 interim_results=all(t.capabilities.interim_results for t in stt),
                 diarization=all(t.capabilities.diarization for t in stt),
+                aligned_transcript=aligned_transcript,
             )
         )
 
 
@@ -4,21 +4,22 @@
 import inspect
 from collections import defaultdict
 from collections.abc import Callable
-from typing import ParamSpec, TypeVar
+from typing import ParamSpec, TypeVar, cast
 
 from ..log import logger
 from ..types import NOT_GIVEN
 from .misc import is_given
 
 _P = ParamSpec("_P")
 _R = TypeVar("_R")
+_F = TypeVar("_F", bound=Callable)
 
 
 def deprecate_params(
     mapping: dict[str, str],
     *,
     target_version: str | None = None,
-) -> Callable[[Callable[_P, _R]], Callable[_P, _R]]:
+) -> Callable[[_F], _F]:
     """
     Args:
         mapping: {old_param: suggestion}
@@ -59,4 +60,4 @@ def wrapper(*args: _P.args, **kwargs: _P.kwargs) -> _R:
 
         return wrapper
 
-    return decorator
+    return cast(Callable[[_F], _F], decorator)
@@ -191,6 +191,21 @@ def __call__(self, frame: rtc.VideoFrame, session: AgentSession) -> bool:
 
 
 class AgentSession(rtc.EventEmitter[EventTypes], Generic[Userdata_T]):
+    @deprecate_params(
+        {
+            "min_endpointing_delay": "Use turn_handling=TurnHandlingOptions(...) instead",
+            "max_endpointing_delay": "Use turn_handling=TurnHandlingOptions(...) instead",
+            "false_interruption_timeout": "Use turn_handling=TurnHandlingOptions(...) instead",
+            "resume_false_interruption": "Use turn_handling=TurnHandlingOptions(...) instead",
+            "allow_interruptions": "Use turn_handling=TurnHandlingOptions(...) instead",
+            "discard_audio_if_uninterruptible": "Use turn_handling=TurnHandlingOptions(...) instead",
+            "min_interruption_duration": "Use turn_handling=TurnHandlingOptions(...) instead",
+            "min_interruption_words": "Use turn_handling=TurnHandlingOptions(...) instead",
+            "turn_detection": "Use turn_handling=TurnHandlingOptions(...) instead",
+            "agent_false_interruption_timeout": "Use turn_handling=TurnHandlingOptions(...) instead",
+        },
+        target_version="v2.0",
+    )
     def __init__(
         self,
         *,
@@ -434,23 +449,6 @@ def __init__(
         # ivr activity
         self._ivr_activity: IVRActivity | None = None
 
-    if not TYPE_CHECKING:
-        __init__ = deprecate_params(
-            {
-                "min_endpointing_delay": "Use turn_handling=TurnHandlingOptions(...) instead",
-                "max_endpointing_delay": "Use turn_handling=TurnHandlingOptions(...) instead",
-                "false_interruption_timeout": "Use turn_handling=TurnHandlingOptions(...) instead",
-                "resume_false_interruption": "Use turn_handling=TurnHandlingOptions(...) instead",
-                "allow_interruptions": "Use turn_handling=TurnHandlingOptions(...) instead",
-                "discard_audio_if_uninterruptible": "Use turn_handling=TurnHandlingOptions(...) instead",
-                "min_interruption_duration": "Use turn_handling=TurnHandlingOptions(...) instead",
-                "min_interruption_words": "Use turn_handling=TurnHandlingOptions(...) instead",
-                "turn_detection": "Use turn_handling=TurnHandlingOptions(...) instead",
-                "agent_false_interruption_timeout": "Use turn_handling=TurnHandlingOptions(...) instead",
-            },
-            target_version="v2.0",
-        )(__init__)
-
     def on(self, event: EventTypes, callback: Callable | None = None) -> Callable:
         if event == "metrics_collected" and callback is not None:
             logger.warning(
@@ -990,31 +988,57 @@ async def aclose(self) -> None:
     def update_options(
         self,
         *,
+        endpointing_opts: NotGivenOr[EndpointingOptions] = NOT_GIVEN,
+        turn_detection: NotGivenOr[TurnDetectionMode | None] = NOT_GIVEN,
+        # deprecated
         min_endpointing_delay: NotGivenOr[float] = NOT_GIVEN,
         max_endpointing_delay: NotGivenOr[float] = NOT_GIVEN,
-        turn_detection: NotGivenOr[TurnDetectionMode | None] = NOT_GIVEN,
     ) -> None:
         """
         Update the options for the agent session.
 
         Args:
-            min_endpointing_delay (NotGivenOr[float], optional): The minimum endpointing delay.
-            max_endpointing_delay (NotGivenOr[float], optional): The maximum endpointing delay.
+            endpointing_opts (NotGivenOr[EndpointingOptions], optional): Endpointing options.
             turn_detection (NotGivenOr[TurnDetectionMode | None], optional): Strategy for deciding
                 when the user has finished speaking. ``None`` reverts to automatic selection.
+            min_endpointing_delay: Deprecated, use ``endpointing_opts`` instead.
+            max_endpointing_delay: Deprecated, use ``endpointing_opts`` instead.
         """
-        if is_given(min_endpointing_delay):
-            self._opts.endpointing["min_delay"] = min_endpointing_delay
-        if is_given(max_endpointing_delay):
-            self._opts.endpointing["max_delay"] = max_endpointing_delay
+        if is_given(min_endpointing_delay) or is_given(max_endpointing_delay):
+            logger.warning(
+                "min_endpointing_delay and max_endpointing_delay are deprecated, "
+                "use endpointing_opts instead"
+            )
+            endpointing_opts = EndpointingOptions(
+                mode=self._opts.endpointing["mode"],
+                min_delay=(
+                    min_endpointing_delay
+                    if is_given(min_endpointing_delay)
+                    else self._opts.endpointing["min_delay"]
+                ),
+                max_delay=(
+                    max_endpointing_delay
+                    if is_given(max_endpointing_delay)
+                    else self._opts.endpointing["max_delay"]
+                ),
+            )
+
+        if is_given(endpointing_opts):
+            if (mode := endpointing_opts.get("mode")) is not None:
+                self._opts.endpointing["mode"] = mode
+            if (min_delay := endpointing_opts.get("min_delay")) is not None:
+                self._opts.endpointing["min_delay"] = min_delay
+            if (max_delay := endpointing_opts.get("max_delay")) is not None:
+                self._opts.endpointing["max_delay"] = max_delay
 
         if is_given(turn_detection):
             self._turn_detection = turn_detection
 
         if self._activity is not None:
             self._activity.update_options(
-                min_endpointing_delay=min_endpointing_delay,
-                max_endpointing_delay=max_endpointing_delay,
+                endpointing_opts=(
+                    self._opts.endpointing if is_given(endpointing_opts) else NOT_GIVEN
+                ),
                 turn_detection=turn_detection,
             )
 
 
@@ -56,6 +56,7 @@ class _LLMOptions:
     caching: NotGivenOr[Literal["ephemeral"]]
     top_k: NotGivenOr[int]
     max_tokens: NotGivenOr[int]
+    strict_tool_schema: bool
     """If set to "ephemeral", the system prompt, tools, and chat history will be cached."""
 
 
@@ -74,6 +75,7 @@ def __init__(
         parallel_tool_calls: NotGivenOr[bool] = NOT_GIVEN,
         tool_choice: NotGivenOr[ToolChoice] = NOT_GIVEN,
         caching: NotGivenOr[Literal["ephemeral"]] = NOT_GIVEN,
+        _strict_tool_schema: bool = True,
     ) -> None:
         """
         Create a new instance of Anthropic LLM.
@@ -103,6 +105,7 @@ def __init__(
             caching=caching,
             top_k=top_k,
             max_tokens=max_tokens,
+            strict_tool_schema=_strict_tool_schema,
         )
         anthropic_api_key = api_key if is_given(api_key) else os.environ.get("ANTHROPIC_API_KEY")
         if not anthropic_api_key:
@@ -164,7 +167,9 @@ def chat(
             from .tools import AnthropicTool
 
             tool_ctx = llm.ToolContext(tools)
-            tool_schemas = tool_ctx.parse_function_tools("anthropic")
+            tool_schemas = tool_ctx.parse_function_tools(
+                "anthropic", strict=self._opts.strict_tool_schema
+            )
 
             for tool in tool_ctx.provider_tools:
                 if isinstance(tool, AnthropicTool):
 
@@ -62,6 +62,7 @@ class STTOptions:
     vad_threshold: NotGivenOr[float] = NOT_GIVEN
     speaker_labels: NotGivenOr[bool] = NOT_GIVEN
     max_speakers: NotGivenOr[int] = NOT_GIVEN
+    domain: NotGivenOr[str] = NOT_GIVEN
 
 
 class STT(stt.STT):
@@ -87,6 +88,7 @@ def __init__(
         vad_threshold: NotGivenOr[float] = NOT_GIVEN,
         speaker_labels: NotGivenOr[bool] = NOT_GIVEN,
         max_speakers: NotGivenOr[int] = NOT_GIVEN,
+        domain: NotGivenOr[str] = NOT_GIVEN,
         http_session: aiohttp.ClientSession | None = None,
         buffer_size_seconds: float = 0.05,
         base_url: str = "wss://streaming.assemblyai.com",
@@ -161,6 +163,7 @@ def __init__(
             vad_threshold=vad_threshold,
             speaker_labels=speaker_labels,
             max_speakers=max_speakers,
+            domain=domain,
         )
         self._session = http_session
         self._streams = weakref.WeakSet[SpeechStream]()
@@ -483,6 +486,7 @@ async def _connect_ws(self) -> aiohttp.ClientWebSocketResponse:
             if is_given(self._opts.speaker_labels)
             else None,
             "max_speakers": self._opts.max_speakers if is_given(self._opts.max_speakers) else None,
+            "domain": self._opts.domain if is_given(self._opts.domain) else None,
         }
 
         headers = {
Original file line number	Diff line number	Diff line change
`@@ -68,11 +68,18 @@ def __init__(`
`68`	`68`	`StreamAdapter(stt=t, vad=vad) if not t.capabilities.streaming else t for t in stt`
`69`	`69`	`]`
`70`	`70`
	`71`	`+ # Use the primary STT's aligned_transcript if all providers support it, since`
	`72`	`+ # the SDK only checks truthiness, not the specific granularity.`
	`73`	`+ aligned_transcript: Literal["word", "chunk", False] = False`
	`74`	`+ if all(t.capabilities.aligned_transcript for t in stt):`
	`75`	`+ aligned_transcript = stt[0].capabilities.aligned_transcript`
	`76`	`+`
`71`	`77`	`super().__init__(`
`72`	`78`	`capabilities=STTCapabilities(`
`73`	`79`	`streaming=True,`
`74`	`80`	`interim_results=all(t.capabilities.interim_results for t in stt),`
`75`	`81`	`diarization=all(t.capabilities.diarization for t in stt),`
	`82`	`+ aligned_transcript=aligned_transcript,`
`76`	`83`	`)`
`77`	`84`	`)`
`78`	`85`