Azure
diff --git a/‎sdk/ai/azure-ai-voicelive/.env.template‎
Lines changed: 3 additions & 3 deletions b/‎sdk/ai/azure-ai-voicelive/.env.template‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎sdk/ai/azure-ai-voicelive/CHANGELOG.md‎
Lines changed: 33 additions & 0 deletions b/‎sdk/ai/azure-ai-voicelive/CHANGELOG.md‎
Lines changed: 33 additions & 0 deletions
diff --git a/‎sdk/ai/azure-ai-voicelive/README.md‎
Lines changed: 6 additions & 6 deletions b/‎sdk/ai/azure-ai-voicelive/README.md‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎sdk/ai/azure-ai-voicelive/apiview-properties.json‎
Lines changed: 23 additions & 19 deletions b/‎sdk/ai/azure-ai-voicelive/apiview-properties.json‎
Lines changed: 23 additions & 19 deletions
diff --git a/‎sdk/ai/azure-ai-voicelive/azure/ai/voicelive/_patch.py‎
Lines changed: 12 additions & 7 deletions b/‎sdk/ai/azure-ai-voicelive/azure/ai/voicelive/_patch.py‎
Lines changed: 12 additions & 7 deletions
diff --git a/‎sdk/ai/azure-ai-voicelive/azure/ai/voicelive/_types.py‎
Lines changed: 1 addition & 3 deletions b/‎sdk/ai/azure-ai-voicelive/azure/ai/voicelive/_types.py‎
Lines changed: 1 addition & 3 deletions
diff --git a/‎sdk/ai/azure-ai-voicelive/azure/ai/voicelive/_utils/model_base.py‎
Lines changed: 1 addition & 0 deletions b/‎sdk/ai/azure-ai-voicelive/azure/ai/voicelive/_utils/model_base.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎sdk/ai/azure-ai-voicelive/azure/ai/voicelive/_version.py‎
Lines changed: 1 addition & 1 deletion b/‎sdk/ai/azure-ai-voicelive/azure/ai/voicelive/_version.py‎
Lines changed: 1 addition & 1 deletion
@@ -6,9 +6,9 @@ AZURE_VOICELIVE_API_KEY=your-voicelive-api-key
 AZURE_VOICELIVE_ENDPOINT=wss://api.voicelive.com/v1
 
 # Optional configuration
-VOICELIVE_MODEL=gpt-4o-realtime-preview
-VOICELIVE_VOICE=alloy
-VOICELIVE_INSTRUCTIONS=You are a helpful assistant. Keep your responses concise.
+AZURE_VOICELIVE_MODEL=gpt-4o-realtime-preview
+AZURE_VOICELIVE_VOICE=alloy
+AZURE_VOICELIVE_INSTRUCTIONS=You are a helpful assistant. Keep your responses concise.
 
 # For audio samples
 AUDIO_FILE=path/to/your/test_audio.wav
@@ -1,5 +1,38 @@
 # Release History
 
+## 1.0.0b4 (Unreleased)
+
+### Features Added
+
+- **Personal Voice Models**: Added `PersonalVoiceModels` enum with support for `DragonLatestNeural`, `PhoenixLatestNeural`, and `PhoenixV2Neural` models
+- **Enhanced Animation Support**: Added comprehensive server event classes for animation blendshapes and viseme handling:
+  - `ServerEventResponseAnimationBlendshapeDelta` and `ServerEventResponseAnimationBlendshapeDone`
+  - `ServerEventResponseAnimationVisemeDelta` and `ServerEventResponseAnimationVisemeDone`
+- **Audio Timestamp Events**: Added `ServerEventResponseAudioTimestampDelta` and `ServerEventResponseAudioTimestampDone` for better audio timing control
+- **Improved Error Handling**: Added `ErrorResponse` class for better error management
+- **Enhanced Base Classes**: Added `ConversationItemBase` and `SessionBase` for better code organization and inheritance
+- **Token Usage Improvements**: Renamed `Usage` to `TokenUsage` for better clarity
+- **Audio Format Improvements**: Reorganized audio format enums with separate `InputAudioFormat` and `OutputAudioFormat` enums for better clarity
+- **Enhanced Output Audio Format Support**: Added more granular output audio format options including specific sampling rates (8kHz, 16kHz) for PCM16
+
+### Breaking Changes
+
+- **Model Cleanup**: Removed experimental classes `AzurePlatformVoice`, `LLMVoice`, `AzureSemanticVadServer`, `InputAudio`, `NoTurnDetection`, and `ToolChoiceFunctionObjectFunction`
+- **Class Rename**: Renamed `Usage` class to `TokenUsage` for better clarity
+- **Enum Reorganization**:
+  - Replaced `AudioFormat` enum with separate `InputAudioFormat` and `OutputAudioFormat` enums
+  - Removed `Phi4mmVoice` enum
+  - Removed `EMOTION` value from `AnimationOutputType` enum
+  - Removed `IN_PROGRESS` value from `ItemParamStatus` enum
+- **Server Events**: Removed `RESPONSE_EMOTION_HYPOTHESIS` from `ServerEventType` enum
+
+### Other Changes
+
+- **Package Structure**: Simplified package initialization with namespace package support
+- **Sample Updates**: Improved basic voice assistant samples
+- **Code Optimization**: Streamlined model definitions with significant code reduction
+- **API Configuration**: Updated API view properties for better tooling support
+
 ## 1.0.0b3 (2025-09-17)
 
 ### Features Added
 
@@ -137,7 +137,7 @@ import asyncio
 from azure.core.credentials import AzureKeyCredential
 from azure.ai.voicelive.aio import connect
 from azure.ai.voicelive.models import (
-    RequestSession, Modality, AudioFormat, ServerVad, ServerEventType
+    RequestSession, Modality, InputAudioFormat, OutputAudioFormat, ServerVad, ServerEventType
 )
 
 API_KEY = "your-api-key"
@@ -153,8 +153,8 @@ async def main():
         session = RequestSession(
             modalities=[Modality.TEXT, Modality.AUDIO],
             instructions="You are a helpful assistant.",
-            input_audio_format=AudioFormat.PCM16,
-            output_audio_format=AudioFormat.PCM16,
+            input_audio_format=InputAudioFormat.PCM16,
+            output_audio_format=OutputAudioFormat.PCM16,
             turn_detection=ServerVad(
                 threshold=0.5, 
                 prefix_padding_ms=300, 
@@ -178,7 +178,7 @@ asyncio.run(main())
 from azure.core.credentials import AzureKeyCredential
 from azure.ai.voicelive import connect
 from azure.ai.voicelive.models import (
-    RequestSession, Modality, AudioFormat, ServerVad, ServerEventType
+    RequestSession, Modality, InputAudioFormat, OutputAudioFormat, ServerVad, ServerEventType
 )
 
 API_KEY = "your-api-key"
@@ -193,8 +193,8 @@ with connect(
     session = RequestSession(
         modalities=[Modality.TEXT, Modality.AUDIO],
         instructions="You are a helpful assistant.",
-        input_audio_format=AudioFormat.PCM16,
-        output_audio_format=AudioFormat.PCM16,
+        input_audio_format=InputAudioFormat.PCM16,
+        output_audio_format=OutputAudioFormat.PCM16,
         turn_detection=ServerVad(
             threshold=0.5, 
             prefix_padding_ms=300, 
 
@@ -15,14 +15,12 @@
         "azure.ai.voicelive.models.TurnDetection": "VoiceLive.TurnDetection",
         "azure.ai.voicelive.models.AzureMultilingualSemanticVad": "VoiceLive.AzureMultilingualSemanticVad",
         "azure.ai.voicelive.models.AzurePersonalVoice": "VoiceLive.AzurePersonalVoice",
-        "azure.ai.voicelive.models.AzurePlatformVoice": "VoiceLive.AzurePlatformVoice",
         "azure.ai.voicelive.models.EOUDetection": "VoiceLive.EOUDetection",
         "azure.ai.voicelive.models.AzureSemanticDetection": "VoiceLive.AzureSemanticDetection",
         "azure.ai.voicelive.models.AzureSemanticDetectionEn": "VoiceLive.AzureSemanticDetectionEn",
         "azure.ai.voicelive.models.AzureSemanticDetectionMultilingual": "VoiceLive.AzureSemanticDetectionMultilingual",
         "azure.ai.voicelive.models.AzureSemanticVad": "VoiceLive.AzureSemanticVad",
         "azure.ai.voicelive.models.AzureSemanticVadEn": "VoiceLive.AzureSemanticVadEn",
-        "azure.ai.voicelive.models.AzureSemanticVadServer": "VoiceLive.AzureSemanticVadServer",
         "azure.ai.voicelive.models.AzureStandardVoice": "VoiceLive.AzureStandardVoice",
         "azure.ai.voicelive.models.CachedTokenDetails": "VoiceLive.CachedTokenDetails",
         "azure.ai.voicelive.models.ClientEvent": "VoiceLive.ClientEvent",
@@ -43,19 +41,18 @@
         "azure.ai.voicelive.models.ClientEventSessionAvatarConnect": "VoiceLive.ClientEventSessionAvatarConnect",
         "azure.ai.voicelive.models.ClientEventSessionUpdate": "VoiceLive.ClientEventSessionUpdate",
         "azure.ai.voicelive.models.ContentPart": "VoiceLive.ContentPart",
+        "azure.ai.voicelive.models.ConversationItemBase": "VoiceLive.ConversationItemBase",
+        "azure.ai.voicelive.models.ErrorResponse": "VoiceLive.ErrorResponse",
         "azure.ai.voicelive.models.FunctionCallItem": "VoiceLive.FunctionCallItem",
         "azure.ai.voicelive.models.FunctionCallOutputItem": "VoiceLive.FunctionCallOutputItem",
         "azure.ai.voicelive.models.Tool": "VoiceLive.Tool",
         "azure.ai.voicelive.models.FunctionTool": "VoiceLive.FunctionTool",
         "azure.ai.voicelive.models.IceServer": "VoiceLive.IceServer",
-        "azure.ai.voicelive.models.InputAudio": "VoiceLive.InputAudio",
         "azure.ai.voicelive.models.UserContentPart": "VoiceLive.UserContentPart",
         "azure.ai.voicelive.models.InputAudioContentPart": "VoiceLive.InputAudioContentPart",
         "azure.ai.voicelive.models.InputTextContentPart": "VoiceLive.InputTextContentPart",
         "azure.ai.voicelive.models.InputTokenDetails": "VoiceLive.InputTokenDetails",
-        "azure.ai.voicelive.models.LLMVoice": "VoiceLive.LLMVoice",
         "azure.ai.voicelive.models.LogProbProperties": "VoiceLive.LogProbProperties",
-        "azure.ai.voicelive.models.NoTurnDetection": "VoiceLive.NoTurnDetection",
         "azure.ai.voicelive.models.OpenAIVoice": "VoiceLive.OpenAIVoice",
         "azure.ai.voicelive.models.OutputTextContentPart": "VoiceLive.OutputTextContentPart",
         "azure.ai.voicelive.models.OutputTokenDetails": "VoiceLive.OutputTokenDetails",
@@ -89,15 +86,21 @@
         "azure.ai.voicelive.models.ServerEventInputAudioBufferCommitted": "VoiceLive.ServerEventInputAudioBufferCommitted",
         "azure.ai.voicelive.models.ServerEventInputAudioBufferSpeechStarted": "VoiceLive.ServerEventInputAudioBufferSpeechStarted",
         "azure.ai.voicelive.models.ServerEventInputAudioBufferSpeechStopped": "VoiceLive.ServerEventInputAudioBufferSpeechStopped",
+        "azure.ai.voicelive.models.ServerEventResponseAnimationBlendshapeDelta": "VoiceLive.ServerEventResponseAnimationBlendshapeDelta",
+        "azure.ai.voicelive.models.ServerEventResponseAnimationBlendshapeDone": "VoiceLive.ServerEventResponseAnimationBlendshapeDone",
+        "azure.ai.voicelive.models.ServerEventResponseAnimationVisemeDelta": "VoiceLive.ServerEventResponseAnimationVisemeDelta",
+        "azure.ai.voicelive.models.ServerEventResponseAnimationVisemeDone": "VoiceLive.ServerEventResponseAnimationVisemeDone",
         "azure.ai.voicelive.models.ServerEventResponseAudioDelta": "VoiceLive.ServerEventResponseAudioDelta",
         "azure.ai.voicelive.models.ServerEventResponseAudioDone": "VoiceLive.ServerEventResponseAudioDone",
+        "azure.ai.voicelive.models.ServerEventResponseAudioTimestampDelta": "VoiceLive.ServerEventResponseAudioTimestampDelta",
+        "azure.ai.voicelive.models.ServerEventResponseAudioTimestampDone": "VoiceLive.ServerEventResponseAudioTimestampDone",
         "azure.ai.voicelive.models.ServerEventResponseAudioTranscriptDelta": "VoiceLive.ServerEventResponseAudioTranscriptDelta",
         "azure.ai.voicelive.models.ServerEventResponseAudioTranscriptDone": "VoiceLive.ServerEventResponseAudioTranscriptDone",
         "azure.ai.voicelive.models.ServerEventResponseContentPartAdded": "VoiceLive.ServerEventResponseContentPartAdded",
         "azure.ai.voicelive.models.ServerEventResponseContentPartDone": "VoiceLive.ServerEventResponseContentPartDone",
         "azure.ai.voicelive.models.ServerEventResponseCreated": "VoiceLive.ServerEventResponseCreated",
         "azure.ai.voicelive.models.ServerEventResponseDone": "VoiceLive.ServerEventResponseDone",
-                        "azure.ai.voicelive.models.ServerEventResponseFunctionCallArgumentsDelta": "VoiceLive.ServerEventResponseFunctionCallArgumentsDelta",
+        "azure.ai.voicelive.models.ServerEventResponseFunctionCallArgumentsDelta": "VoiceLive.ServerEventResponseFunctionCallArgumentsDelta",
         "azure.ai.voicelive.models.ServerEventResponseFunctionCallArgumentsDone": "VoiceLive.ServerEventResponseFunctionCallArgumentsDone",
         "azure.ai.voicelive.models.ServerEventResponseOutputItemAdded": "VoiceLive.ServerEventResponseOutputItemAdded",
         "azure.ai.voicelive.models.ServerEventResponseOutputItemDone": "VoiceLive.ServerEventResponseOutputItemDone",
@@ -107,31 +110,32 @@
         "azure.ai.voicelive.models.ServerEventSessionCreated": "VoiceLive.ServerEventSessionCreated",
         "azure.ai.voicelive.models.ServerEventSessionUpdated": "VoiceLive.ServerEventSessionUpdated",
         "azure.ai.voicelive.models.ServerVad": "VoiceLive.ServerVad",
+        "azure.ai.voicelive.models.SessionBase": "VoiceLive.SessionBase",
         "azure.ai.voicelive.models.SystemMessageItem": "VoiceLive.SystemMessageItem",
+        "azure.ai.voicelive.models.TokenUsage": "VoiceLive.TokenUsage",
         "azure.ai.voicelive.models.ToolChoiceObject": "VoiceLive.ToolChoiceObject",
         "azure.ai.voicelive.models.ToolChoiceFunctionObject": "VoiceLive.ToolChoiceFunctionObject",
-        "azure.ai.voicelive.models.ToolChoiceFunctionObjectFunction": "VoiceLive.ToolChoiceFunctionObject.function.anonymous",
-        "azure.ai.voicelive.models.Usage": "VoiceLive.Usage",
         "azure.ai.voicelive.models.UserMessageItem": "VoiceLive.UserMessageItem",
         "azure.ai.voicelive.models.VideoCrop": "VoiceLive.VideoCrop",
         "azure.ai.voicelive.models.VideoParams": "VoiceLive.VideoParams",
         "azure.ai.voicelive.models.VideoResolution": "VoiceLive.VideoResolution",
         "azure.ai.voicelive.models.VoiceLiveErrorDetails": "VoiceLive.VoiceLiveErrorDetails",
-        "azure.ai.voicelive.models.ServerEventType": "VoiceLive.ServerEventType",
+        "azure.ai.voicelive.models.ClientEventType": "VoiceLive.ClientEventType",
         "azure.ai.voicelive.models.ItemType": "VoiceLive.ItemType",
-        "azure.ai.voicelive.models.ResponseItemStatus": "VoiceLive.ResponseItemStatus",
-        "azure.ai.voicelive.models.MessageRole": "VoiceLive.MessageRole",
-        "azure.ai.voicelive.models.ContentPartType": "VoiceLive.ContentPartType",
-        "azure.ai.voicelive.models.ResponseStatus": "VoiceLive.ResponseStatus",
-        "azure.ai.voicelive.models.OAIVoice": "VoiceLive.OAIVoice",
-        "azure.ai.voicelive.models.Phi4mmVoice": "VoiceLive.Phi4mmVoice",
-        "azure.ai.voicelive.models.AudioFormat": "VoiceLive.AudioFormat",
+        "azure.ai.voicelive.models.ItemParamStatus": "VoiceLive.ItemParamStatus",
         "azure.ai.voicelive.models.Modality": "VoiceLive.Modality",
+        "azure.ai.voicelive.models.OAIVoice": "VoiceLive.OAIVoice",
+        "azure.ai.voicelive.models.PersonalVoiceModels": "VoiceLive.PersonalVoiceModels",
+        "azure.ai.voicelive.models.OutputAudioFormat": "VoiceLive.OutputAudioFormat",
+        "azure.ai.voicelive.models.ToolType": "VoiceLive.ToolType",
         "azure.ai.voicelive.models.AnimationOutputType": "VoiceLive.AnimationOutputType",
+        "azure.ai.voicelive.models.InputAudioFormat": "VoiceLive.InputAudioFormat",
         "azure.ai.voicelive.models.AudioTimestampType": "VoiceLive.AudioTimestampType",
-        "azure.ai.voicelive.models.ToolType": "VoiceLive.ToolType",
         "azure.ai.voicelive.models.ToolChoiceLiteral": "VoiceLive.ToolChoiceLiteral",
-        "azure.ai.voicelive.models.ClientEventType": "VoiceLive.ClientEventType",
-        "azure.ai.voicelive.models.ItemParamStatus": "VoiceLive.ItemParamStatus"
+        "azure.ai.voicelive.models.ServerEventType": "VoiceLive.ServerEventType",
+        "azure.ai.voicelive.models.ResponseItemStatus": "VoiceLive.ResponseItemStatus",
+        "azure.ai.voicelive.models.MessageRole": "VoiceLive.MessageRole",
+        "azure.ai.voicelive.models.ContentPartType": "VoiceLive.ContentPartType",
+        "azure.ai.voicelive.models.ResponseStatus": "VoiceLive.ResponseStatus"
     }
 }
@@ -636,8 +636,8 @@ def __init__(
         *,
         credential: Union[AzureKeyCredential, TokenCredential],
         endpoint: str,
-        model: str,
         api_version: str,
+        model: Optional[str] = None,
         extra_query: Optional[Mapping[str, Any]] = None,
         extra_headers: Optional[Mapping[str, Any]] = None,
         connection_options: Optional[WebsocketConnectionOptions] = None,
@@ -646,8 +646,8 @@ def __init__(
         self._credential = credential
         self._endpoint = endpoint
         self.__credential_scopes = kwargs.pop("credential_scopes", "https://cognitiveservices.azure.com/.default")
-        self.__model = model
         self.__api_version = api_version
+        self.__model = model
         self.__connection: Optional[VoiceLiveConnection] = None
         self.__extra_query = extra_query
         self.__extra_headers = extra_headers
@@ -731,7 +731,9 @@ def _prepare_url(self) -> str:
         parsed = urlparse(self._endpoint)
         scheme = "wss" if parsed.scheme == "https" else ("ws" if parsed.scheme == "http" else parsed.scheme)
 
-        params: dict[str, str] = {"model": self.__model, "api-version": self.__api_version}
+        params: dict[str, Any] = {"api-version": self.__api_version}
+        if self.__model is not None:
+            params["model"] = self.__model
         extra_query: Mapping[str, Any] = self.__extra_query or {}
         for k, v in extra_query.items():
             params[str(k)] = str(v)
@@ -750,8 +752,8 @@ def connect(
     *,
     endpoint: str,
     credential: Union[AzureKeyCredential, TokenCredential],
-    model: str,
     api_version: str = "2025-05-01-preview",
+    model: Optional[str] = None,
     query: Optional[Mapping[str, Any]] = None,
     headers: Optional[Mapping[str, Any]] = None,
     connection_options: Optional[WebsocketConnectionOptions] = None,
@@ -777,10 +779,13 @@ def connect(
     :paramtype endpoint: str
     :keyword credential: Credential used to authenticate the WebSocket connection.
     :paramtype credential: ~azure.core.credentials.AzureKeyCredential or ~azure.core.credentials.TokenCredential
-    :keyword model: Model identifier to use for the session.
-    :paramtype model: str
     :keyword api_version: API version to use. Defaults to ``"2025-05-01-preview"``.
     :paramtype api_version: str
+    :keyword model: Model identifier to use for the session.
+     In most scenarios, this parameter is required.
+     It may be omitted only when connecting through an **Agent** scenario,
+     in which case the service will use the model associated with the Agent.
+    :paramtype model: str
     :keyword query: Optional query parameters to include in the WebSocket URL.
     :paramtype query: Mapping[str, Any] or None
     :keyword headers: Optional headers to include in the WebSocket handshake.
@@ -796,8 +801,8 @@ def connect(
     return _VoiceLiveConnectionManager(
         credential=credential,
         endpoint=endpoint,
-        model=model,
         api_version=api_version,
+        model=model,
         extra_query=query or {},
         extra_headers=headers or {},
         connection_options=connection_options,
 
@@ -10,7 +10,5 @@
 
 if TYPE_CHECKING:
     from . import models as _models
-Voice = Union[
-    str, "_models.OAIVoice", "_models.OpenAIVoice", "_models.AzureVoice", str, "_models.Phi4mmVoice", "_models.LLMVoice"
-]
+Voice = Union[str, "_models.OAIVoice", "_models.OpenAIVoice", "_models.AzureVoice"]
 ToolChoice = Union[str, "_models.ToolChoiceLiteral", "_models.ToolChoiceObject"]
@@ -1,3 +1,4 @@
+# pylint: disable=line-too-long,useless-suppression,too-many-lines
 # coding=utf-8
 # --------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation. All rights reserved.
 
@@ -6,4 +6,4 @@
 # Changes may cause incorrect behavior and will be lost if the code is regenerated.
 # --------------------------------------------------------------------------
 
-VERSION = "1.0.0b3"
+VERSION = "1.0.0b4"
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,4 @@`
	`1`	`+# pylint: disable=line-too-long,useless-suppression,too-many-lines`
`1`	`2`	`# coding=utf-8`
`2`	`3`	`# --------------------------------------------------------------------------`
`3`	`4`	`# Copyright (c) Microsoft Corporation. All rights reserved.`