[VoiceLive] Add async function-calling agent sample (#42978)

xitzhang · Xiting Zhang · Copilot · web-flow · commit 46d8caa2f4d6 · 2025-09-12T17:38:37.000-07:00
* [VoiceLive] Add async function-calling agent sample

* add phrase list

* fix typo

* Update sdk/ai/azure-ai-voicelive/samples/async_function_calling_sample.py

Co-authored-by: Copilot &lt;175728472+Copilot@users.noreply.github.com&gt;

* Update sdk/ai/azure-ai-voicelive/samples/async_function_calling_sample.py

Co-authored-by: Copilot &lt;175728472+Copilot@users.noreply.github.com&gt;

* update

* fix typo

* update changelog

* update

* remove breaking change section

* update changelog

* fix change log

* revert changelog I lost

---------

Co-authored-by: Xiting Zhang &lt;xitzhang@microsoft.com&gt;
Co-authored-by: Copilot &lt;175728472+Copilot@users.noreply.github.com&gt;
diff --git a/sdk/ai/azure-ai-voicelive/.env.template b/sdk/ai/azure-ai-voicelive/.env.template
@@ -2,7 +2,7 @@
 # Copy this file to .env and fill in your values
 
 # Required credentials
-AZURE_VOICELIVE_KEY=your-voicelive-api-key
+AZURE_VOICELIVE_API_KEY=your-voicelive-api-key
 AZURE_VOICELIVE_ENDPOINT=wss://api.voicelive.com/v1
 
 # Optional configuration
diff --git a/sdk/ai/azure-ai-voicelive/CHANGELOG.md b/sdk/ai/azure-ai-voicelive/CHANGELOG.md
@@ -1,12 +1,23 @@
 # Release History
 
+## 1.0.0b3 (Unreleased)
+
+### Features Added
+
+- Phrase list
+
+### Breaking Changes
+
+- Removed `custom_model` and `enabled` from `AudioInputTranscriptionSettings`.
+
 ## 1.0.0b2 (2025-09-10)
 
 ### Features Added
 
 - Async function call
 
 ### Bugs Fixed
+
 - Fixed function calling: ensure `FunctionCallOutputItem.output` is properly serialized as a JSON string before sending to the service.
 
 ## 1.0.0b1 (2025-08-28)
diff --git a/sdk/ai/azure-ai-voicelive/azure/ai/voicelive/_version.py b/sdk/ai/azure-ai-voicelive/azure/ai/voicelive/_version.py
@@ -6,4 +6,4 @@
 # Changes may cause incorrect behavior and will be lost if the code is regenerated.
 # --------------------------------------------------------------------------
 
-VERSION = "1.0.0b2"
+VERSION = "1.0.0b3"
diff --git a/sdk/ai/azure-ai-voicelive/azure/ai/voicelive/models/_models.py b/sdk/ai/azure-ai-voicelive/azure/ai/voicelive/models/_models.py
@@ -252,47 +252,53 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
 class AudioInputTranscriptionSettings(_Model):
     """Configuration for input audio transcription.
 
-    :ivar model: The model used for transcription. E.g., 'whisper-1', 'azure-fast-transcription',
-     's2s-ingraph'. Required. Is one of the following types: Literal["whisper-1"],
-     Literal["azure-fast-transcription"], Literal["s2s-ingraph"]
+    :ivar model: The transcription model to use. Supported values:
+     "whisper-1", "gpt-4o-transcribe", "gpt-4o-mini-transcribe",
+     "azure-fast-transcription", "azure-speech". Required.
     :vartype model: str
-    :ivar language: The language code to use for transcription, if specified.
-    :vartype language: str
-    :ivar enabled: Whether transcription is enabled. Required.
-    :vartype enabled: bool
-    :ivar custom_model: Whether a custom model is being used. Required.
-    :vartype custom_model: bool
+    :ivar language: Optional BCP-47 language code (e.g., "en-US").
+    :vartype language: str | None
+    :ivar custom_speech: Optional configuration for custom speech models.
+    :vartype custom_speech: dict[str, str] | None
+    :ivar phrase_list: Optional list of phrase hints to bias recognition.
+    :vartype phrase_list: list[str] | None
     """
 
-    model: Literal["whisper-1", "azure-fast-transcription", "s2s-ingraph"] = rest_field(
-        visibility=["read", "create", "update", "delete", "query"]
-    )
-    """The model used for transcription. E.g., 'whisper-1', 'azure-fast-transcription', 's2s-ingraph'.
-     Required. Is one of the following types: Literal[\"whisper-1\"],
-     Literal[\"azure-fast-transcription\"], Literal[\"s2s-ingraph\"]"""
+    model: Literal[
+        "whisper-1",
+        "gpt-4o-transcribe",
+        "gpt-4o-mini-transcribe",
+        "azure-fast-transcription",
+        "azure-speech",
+    ] = rest_field(visibility=["read", "create", "update", "delete", "query"])
+    """Required transcription model."""
+
     language: Optional[str] = rest_field(visibility=["read", "create", "update", "delete", "query"])
-    """The language code to use for transcription, if specified."""
-    enabled: bool = rest_field(visibility=["read", "create", "update", "delete", "query"])
-    """Whether transcription is enabled. Required."""
-    custom_model: bool = rest_field(visibility=["read", "create", "update", "delete", "query"])
-    """Whether a custom model is being used. Required."""
+    """Optional language code (e.g., 'en-US')."""
+
+    custom_speech: Optional[Dict[str, str]] = rest_field(visibility=["read", "create", "update", "delete", "query"])
+    """Optional custom speech configuration."""
+
+    phrase_list: Optional[List[str]] = rest_field(visibility=["read", "create", "update", "delete", "query"])
+    """Optional phrase hints."""
 
     @overload
     def __init__(
         self,
         *,
-        model: Literal["whisper-1", "azure-fast-transcription", "s2s-ingraph"],
-        enabled: bool,
-        custom_model: bool,
-        language: Optional[str] = None,
+        model: Literal[
+            "whisper-1",
+            "gpt-4o-transcribe",
+            "gpt-4o-mini-transcribe",
+            "azure-fast-transcription",
+            "azure-speech",
+        ],
+        language: Optional[str] = ...,
+        custom_speech: Optional[Dict[str, str]] = ...,
+        phrase_list: Optional[List[str]] = ...,
     ) -> None: ...
-
     @overload
-    def __init__(self, mapping: Mapping[str, Any]) -> None:
-        """
-        :param mapping: raw JSON to initialize the model.
-        :type mapping: Mapping[str, Any]
-        """
+    def __init__(self, mapping: Mapping[str, Any]) -> None: ...
 
     def __init__(self, *args: Any, **kwargs: Any) -> None:
         super().__init__(*args, **kwargs)
@@ -3317,7 +3323,7 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
         super().__init__(*args, **kwargs)
 
     @classmethod
-    def deserialize(cls, payload: dict[str, Any]) -> "ServerEvent":
+    def deserialize(cls, payload: Dict[str, Any]) -> "ServerEvent":
         # public, linter-friendly entrypoint
         # pylint: disable-next=protected-access
         return cls._deserialize(payload, [])
diff --git a/sdk/ai/azure-ai-voicelive/pyproject.toml b/sdk/ai/azure-ai-voicelive/pyproject.toml
@@ -1,42 +1,55 @@
+# --------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See License.txt in the project root for license information.
+# Code generated by Microsoft (R) Python Code Generator.
+# Changes may cause incorrect behavior and will be lost if the code is regenerated.
+# --------------------------------------------------------------------------
+
 [build-system]
 requires = ["setuptools>=77.0.3", "wheel"]
 build-backend = "setuptools.build_meta"
 
 [project]
 name = "azure-ai-voicelive"
-authors = [{ name = "Microsoft Corporation", email = "azpysdkhelp@microsoft.com" }]
-description = "Microsoft Corporation Azure AI VoiceLive Client Library for Python"
+authors = [
+  { name = "Microsoft Corporation", email = "azpysdkhelp@microsoft.com" },
+]
+description = "Microsoft Corporation Azure Ai Voicelive Client Library for Python"
 license = "MIT"
 classifiers = [
-  "Development Status :: 4 - Beta",
-  "Programming Language :: Python",
-  "Programming Language :: Python :: 3 :: Only",
-  "Programming Language :: Python :: 3",
-  "Programming Language :: Python :: 3.9",
-  "Programming Language :: Python :: 3.10",
-  "Programming Language :: Python :: 3.11",
-  "Programming Language :: Python :: 3.12",
-  "Programming Language :: Python :: 3.13",
+    "Development Status :: 4 - Beta",
+    "Programming Language :: Python",
+    "Programming Language :: Python :: 3 :: Only",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
 ]
 requires-python = ">=3.9"
-keywords = ["azure", "azure sdk", "voice", "voicelive", "realtime", "websocket", "audio"]
+keywords = ["azure", "azure sdk"]
 
 dependencies = [
-  "isodate>=0.6.1",
-  "azure-core>=1.35.0",
-  "typing-extensions>=4.6.0",
+    "isodate>=0.6.1",
+    "azure-core>=1.35.0",
+    "typing-extensions>=4.6.0",
+]
+dynamic = [
+"version", "readme"
 ]
-
-dynamic = ["version", "readme"]
 
 [project.optional-dependencies]
-aiohttp = ["aiohttp>=3.9.0,<4.0.0"]
-websockets = ["websockets>=12.0,<14.0"]
+aiohttp = [
+  "aiohttp>=3.9.0,<4.0.0",
+]
+websockets = [
+  "websockets>=12.0,<14.0",
+]
 all-websockets = [
   "aiohttp>=3.9.0,<4.0.0",
   "websockets>=12.0,<14.0",
 ]
-
 test = [
   "pytest>=8.0",
   "pytest-asyncio>=0.23",
@@ -50,17 +63,14 @@ test = [
 ]
 
 [project.urls]
-Repository = "https://github.com/Azure/azure-sdk-for-python"
+repository = "https://github.com/Azure/azure-sdk-for-python"
 
 [tool.setuptools.dynamic]
-version = { attr = "azure.ai.voicelive._version.VERSION" }
-readme = { file = ["README.md", "CHANGELOG.md"], content-type = "text/markdown" }
+version = {attr = "azure.ai.voicelive._version.VERSION"}
+readme = {file = ["README.md", "CHANGELOG.md"], content-type = "text/markdown"}
 
 [tool.setuptools.packages.find]
 include = ["azure.ai.voicelive", "azure.ai.voicelive.*"]
 
 [tool.setuptools.package-data]
 "azure.ai.voicelive" = ["py.typed"]
-
-[tool.azure-sdk-build]
-verifytypes = false
diff --git a/sdk/ai/azure-ai-voicelive/samples/BASIC_VOICE_ASSISTANT.md b/sdk/ai/azure-ai-voicelive/samples/BASIC_VOICE_ASSISTANT.md
@@ -34,7 +34,7 @@ This sample demonstrates the fundamental capabilities of the Azure VoiceLive SDK
    
    Or set environment variables directly:
    ```bash
-   export AZURE_VOICELIVE_KEY="your-api-key"
+   export AZURE_VOICELIVE_API_KEY="your-api-key"
    export AZURE_VOICELIVE_ENDPOINT="wss://api.voicelive.com/v1"
    export VOICELIVE_MODEL="gpt-4o-realtime-preview"
    export VOICELIVE_VOICE="en-US-AvaNeural"
@@ -63,7 +63,7 @@ python basic_voice_assistant.py \
 
 ### Available Options
 
-- `--api-key`: Azure VoiceLive API key (or use AZURE_VOICELIVE_KEY env var)
+- `--api-key`: Azure VoiceLive API key (or use AZURE_VOICELIVE_API_KEY env var)
 - `--endpoint`: VoiceLive endpoint URL
 - `--model`: Model to use (default: gpt-4o-realtime-preview)
 - `--voice`: Voice for the assistant (alloy, echo, fable, onyx, nova, shimmer, en-US-AvaNeural, etc.)
diff --git a/sdk/ai/azure-ai-voicelive/samples/README.md b/sdk/ai/azure-ai-voicelive/samples/README.md
@@ -21,7 +21,7 @@ This directory contains sample applications demonstrating various capabilities o
    Create a `.env` file at the root of the azure-ai-voicelive directory or in the samples directory with the following variables:
 
    ```ini
-   AZURE_VOICELIVE_KEY=your-voicelive-api-key
+   AZURE_VOICELIVE_API_KEY=your-voicelive-api-key
    AZURE_VOICELIVE_ENDPOINT=wss://api.voicelive.com/v1
    VOICELIVE_MODEL=gpt-4o-realtime-preview
    VOICELIVE_VOICE=alloy
@@ -107,7 +107,7 @@ python sample_voicelive_async.py --help
   - Confirm your network allows WSS to the service
 
 - **Auth errors**
-  - For API key: confirm `AZURE_VOICELIVE_KEY`
+  - For API key: confirm `AZURE_VOICELIVE_API_KEY`
   - For AAD: ensure your identity has access to the resource
 
 ## Next steps
diff --git a/sdk/ai/azure-ai-voicelive/samples/async_function_calling_sample.py b/sdk/ai/azure-ai-voicelive/samples/async_function_calling_sample.py
diff --git a/sdk/ai/azure-ai-voicelive/samples/basic_voice_assistant.py b/sdk/ai/azure-ai-voicelive/samples/basic_voice_assistant.py
diff --git a/sdk/ai/azure-ai-voicelive/samples/basic_voice_assistant_async.py b/sdk/ai/azure-ai-voicelive/samples/basic_voice_assistant_async.py