Skip to content

Commit c4973c3

Browse files
xitzhangXiting ZhangCopilot
authored
[VoiceLive] Add LLMVoice and AzurePlatformVoice support, update endpoint to voice-live (#42997)
* [VoiceLive] Add async function-calling agent sample * add phrase list * fix typo * Update sdk/ai/azure-ai-voicelive/samples/async_function_calling_sample.py Co-authored-by: Copilot <[email protected]> * Update sdk/ai/azure-ai-voicelive/samples/async_function_calling_sample.py Co-authored-by: Copilot <[email protected]> * update * fix typo * update changelog * update * remove breaking change section * update changelog * fix change log * revert changelog I lost * update version and change log * enable type verification * update * update models * fix pylint issue --------- Co-authored-by: Xiting Zhang <[email protected]> Co-authored-by: Copilot <[email protected]>
1 parent 6d11ecc commit c4973c3

File tree

13 files changed

+821
-490
lines changed

13 files changed

+821
-490
lines changed

sdk/ai/azure-ai-voicelive/CHANGELOG.md

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,20 @@
44

55
### Features Added
66

7-
- Phrase list
7+
- **Transcription improvement**: Added phrase list
8+
- **New Voice Types**: Added `AzurePlatformVoice` and `LLMVoice` classes
9+
- **Enhanced Speech Detection**: Added `AzureSemanticVadServer` class
10+
- **Improved Function Calling**: Enhanced async function calling sample with better error handling
811

912
### Breaking Changes
1013

11-
- Removed `custom_model` and `enabled` from `AudioInputTranscriptionSettings`.
14+
- **Transcription**: Removed `custom_model` and `enabled` from `AudioInputTranscriptionSettings`.
15+
- **Async Authentication**: Fixed credential handling for async scenarios
16+
- **Model Serialization**: Improved error handling and deserialization
17+
18+
### Other Changes
19+
20+
- **Code Modernization**: Updated type annotations throughout
1221

1322
## 1.0.0b2 (2025-09-10)
1423

sdk/ai/azure-ai-voicelive/apiview-properties.json

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,12 @@
1515
"azure.ai.voicelive.models.TurnDetection": "VoiceLive.TurnDetection",
1616
"azure.ai.voicelive.models.AzureMultilingualSemanticVad": "VoiceLive.AzureMultilingualSemanticVad",
1717
"azure.ai.voicelive.models.AzurePersonalVoice": "VoiceLive.AzurePersonalVoice",
18+
"azure.ai.voicelive.models.AzurePlatformVoice": "VoiceLive.AzurePlatformVoice",
1819
"azure.ai.voicelive.models.EOUDetection": "VoiceLive.EOUDetection",
1920
"azure.ai.voicelive.models.AzureSemanticDetection": "VoiceLive.AzureSemanticDetection",
2021
"azure.ai.voicelive.models.AzureSemanticDetectionMultilingual": "VoiceLive.AzureSemanticDetectionMultilingual",
2122
"azure.ai.voicelive.models.AzureSemanticVad": "VoiceLive.AzureSemanticVad",
23+
"azure.ai.voicelive.models.AzureSemanticVadServer": "VoiceLive.AzureSemanticVadServer",
2224
"azure.ai.voicelive.models.AzureStandardVoice": "VoiceLive.AzureStandardVoice",
2325
"azure.ai.voicelive.models.ClientEvent": "VoiceLive.ClientEvent",
2426
"azure.ai.voicelive.models.ClientEventConversationItemCreate": "VoiceLive.ClientEventConversationItemCreate",
@@ -48,6 +50,7 @@
4850
"azure.ai.voicelive.models.InputAudioContentPart": "VoiceLive.InputAudioContentPart",
4951
"azure.ai.voicelive.models.InputTextContentPart": "VoiceLive.InputTextContentPart",
5052
"azure.ai.voicelive.models.InputTokenDetails": "VoiceLive.InputTokenDetails",
53+
"azure.ai.voicelive.models.LLMVoice": "VoiceLive.LLMVoice",
5154
"azure.ai.voicelive.models.LogProbProperties": "VoiceLive.LogProbProperties",
5255
"azure.ai.voicelive.models.NoTurnDetection": "VoiceLive.NoTurnDetection",
5356
"azure.ai.voicelive.models.OpenAIVoice": "VoiceLive.OpenAIVoice",
@@ -91,7 +94,7 @@
9194
"azure.ai.voicelive.models.ServerEventResponseContentPartDone": "VoiceLive.ServerEventResponseContentPartDone",
9295
"azure.ai.voicelive.models.ServerEventResponseCreated": "VoiceLive.ServerEventResponseCreated",
9396
"azure.ai.voicelive.models.ServerEventResponseDone": "VoiceLive.ServerEventResponseDone",
94-
"azure.ai.voicelive.models.ServerEventResponseFunctionCallArgumentsDelta": "VoiceLive.ServerEventResponseFunctionCallArgumentsDelta",
97+
"azure.ai.voicelive.models.ServerEventResponseFunctionCallArgumentsDelta": "VoiceLive.ServerEventResponseFunctionCallArgumentsDelta",
9598
"azure.ai.voicelive.models.ServerEventResponseFunctionCallArgumentsDone": "VoiceLive.ServerEventResponseFunctionCallArgumentsDone",
9699
"azure.ai.voicelive.models.ServerEventResponseOutputItemAdded": "VoiceLive.ServerEventResponseOutputItemAdded",
97100
"azure.ai.voicelive.models.ServerEventResponseOutputItemDone": "VoiceLive.ServerEventResponseOutputItemDone",

sdk/ai/azure-ai-voicelive/azure/ai/voicelive/_patch.py

Lines changed: 12 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,7 @@
1717
from typing import (
1818
TYPE_CHECKING,
1919
Any,
20-
Dict,
2120
Iterator,
22-
List,
2321
Mapping,
2422
Optional,
2523
Sequence,
@@ -57,9 +55,11 @@
5755
try:
5856
from websockets.typing import Subprotocol as WSSubprotocol # runtime if available
5957
except Exception:
58+
6059
class WSSubprotocol(str): # fallback, keeps runtime simple
6160
pass
6261

62+
6363
if TYPE_CHECKING:
6464
# Not imported at runtime; only for type checkers (mypy/pyright).
6565
from websockets.sync.client import ClientConnection as _WSClientConnection
@@ -69,7 +69,7 @@ class WSSubprotocol(str): # fallback, keeps runtime simple
6969
else:
7070
from typing_extensions import NotRequired # noqa: F401
7171

72-
__all__: List[str] = [
72+
__all__: list[str] = [
7373
"connect",
7474
"WebsocketConnectionOptions",
7575
"ConnectionError",
@@ -339,7 +339,7 @@ def clear(self, *, event_id: Optional[str] = None) -> None:
339339
:return: None
340340
:rtype: None
341341
"""
342-
event: Dict[str, Any] = {"type": "output_audio_buffer.clear"}
342+
event: dict[str, Any] = {"type": "output_audio_buffer.clear"}
343343
if event_id:
344344
event["event_id"] = event_id
345345
self._connection.send(event)
@@ -357,11 +357,7 @@ def __init__(self, connection: "VoiceLiveConnection") -> None:
357357
self._connection = connection
358358

359359
def create(
360-
self,
361-
*,
362-
item: Mapping[str, Any],
363-
previous_item_id: Optional[str] = None,
364-
event_id: Optional[str] = None
360+
self, *, item: Mapping[str, Any], previous_item_id: Optional[str] = None, event_id: Optional[str] = None
365361
) -> None:
366362
"""Create a new conversation item.
367363
@@ -477,7 +473,7 @@ def update(self, *, session: Mapping[str, Any], event_id: Optional[str] = None)
477473
:return: None
478474
:rtype: None
479475
"""
480-
event: Dict[str, Any] = {"type": "transcription_session.update", "session": dict(session)}
476+
event: dict[str, Any] = {"type": "transcription_session.update", "session": dict(session)}
481477
if event_id:
482478
event["event_id"] = event_id
483479
self._connection.send(event)
@@ -674,14 +670,14 @@ def __enter__(self) -> VoiceLiveConnection:
674670

675671
# Build headers as str->str and use list of tuples to satisfy HeadersLike
676672
extra_headers_map: Mapping[str, Any] = self.__extra_headers or {}
677-
merged_headers: Dict[str, str] = {
673+
merged_headers: dict[str, str] = {
678674
**self._get_auth_headers(),
679675
**{str(k): str(v) for k, v in extra_headers_map.items()},
680676
}
681-
headers_like: List[Tuple[str, str]] = list(merged_headers.items())
677+
headers_like: list[Tuple[str, str]] = list(merged_headers.items())
682678

683679
# Build kwargs for websockets; avoid dict(Optional[...])
684-
ws_kwargs: Dict[str, Any] = {}
680+
ws_kwargs: dict[str, Any] = {}
685681
if self.__connection_options is not None:
686682
ws_kwargs.update(cast(Mapping[str, Any], self.__connection_options))
687683

@@ -715,7 +711,7 @@ def __exit__(self, exc_type, exc, exc_tb) -> None:
715711
if self.__connection is not None:
716712
self.__connection.close()
717713

718-
def _get_auth_headers(self) -> Dict[str, str]:
714+
def _get_auth_headers(self) -> dict[str, str]:
719715
"""Get authentication headers for WebSocket connection.
720716
721717
:return: A dictionary containing authentication headers.
@@ -735,7 +731,7 @@ def _prepare_url(self) -> str:
735731
parsed = urlparse(self._endpoint)
736732
scheme = "wss" if parsed.scheme == "https" else ("ws" if parsed.scheme == "http" else parsed.scheme)
737733

738-
params: Dict[str, str] = {"model": self.__model, "api-version": self.__api_version}
734+
params: dict[str, str] = {"model": self.__model, "api-version": self.__api_version}
739735
extra_query: Mapping[str, Any] = self.__extra_query or {}
740736
for k, v in extra_query.items():
741737
params[str(k)] = str(v)
@@ -746,7 +742,7 @@ def _prepare_url(self) -> str:
746742
if key not in params:
747743
params[key] = value_list[0] if value_list else ""
748744

749-
path = parsed.path.rstrip("/") + "/voice-agent/realtime"
745+
path = parsed.path.rstrip("/") + "/voice-live/realtime"
750746
return urlunparse((scheme, parsed.netloc, path, parsed.params, urlencode(params), parsed.fragment))
751747

752748

sdk/ai/azure-ai-voicelive/azure/ai/voicelive/_types.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,5 +10,7 @@
1010

1111
if TYPE_CHECKING:
1212
from . import models as _models
13-
Voice = Union[str, "_models.OAIVoice", "_models.OpenAIVoice", "_models.AzureVoice", "_models.Phi4mmVoice"]
13+
Voice = Union[
14+
str, "_models.OAIVoice", "_models.OpenAIVoice", "_models.AzureVoice", str, "_models.Phi4mmVoice", "_models.LLMVoice"
15+
]
1416
ToolChoice = Union[str, "_models.ToolChoiceLiteral", "_models.ToolChoiceObject"]

0 commit comments

Comments
 (0)