Skip to content

Commit fa49242

Browse files
authored
VoiceLive SDK updates (Azure#52736)
* Timespan changes * More TimeSpan * WIP * Clean up turn detection * Latest update * Generator updates * Changelog updates * Latest updates
1 parent 5ed63e2 commit fa49242

File tree

158 files changed

+2784
-5493
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

158 files changed

+2784
-5493
lines changed

sdk/ai/Azure.AI.VoiceLive/CHANGELOG.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,15 @@
33
## 1.0.0-beta.2 (Unreleased)
44

55
### Features Added
6+
Added overloads for MessageItem creation to accept a single content part.
67

78
### Breaking Changes
9+
AudioFormat was split into InputAudioFormat and OutputAudioFormat.
10+
Emotion classes / options dropped.
11+
Eou and TurnDetection classes renamed.
12+
API properties that were duration based are now TimeSpans
13+
Methods to configure session collapsed to ConfigureSession
14+
Renamed ToolChoiceFunctionObjectFunction to ToolChoiceFunctionObject
815

916
### Bugs Fixed
1017

sdk/ai/Azure.AI.VoiceLive/README.md

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -151,22 +151,22 @@ VoiceLiveSessionOptions sessionOptions = new()
151151
Model = model,
152152
Instructions = "You are a helpful AI assistant. Respond naturally and conversationally.",
153153
Voice = new AzureStandardVoice("en-US-AvaNeural"),
154-
TurnDetection = new ServerVad()
154+
TurnDetection = new AzureSemanticVadTurnDetection()
155155
{
156156
Threshold = 0.5f,
157-
PrefixPaddingMs = 300,
158-
SilenceDurationMs = 500
157+
PrefixPadding = TimeSpan.FromMilliseconds(300),
158+
SilenceDuration = TimeSpan.FromMilliseconds(500)
159159
},
160-
InputAudioFormat = AudioFormat.Pcm16,
161-
OutputAudioFormat = AudioFormat.Pcm16
160+
InputAudioFormat = InputAudioFormat.Pcm16,
161+
OutputAudioFormat = OutputAudioFormat.Pcm16
162162
};
163163

164164
// Ensure modalities include audio
165165
sessionOptions.Modalities.Clear();
166166
sessionOptions.Modalities.Add(InputModality.Text);
167167
sessionOptions.Modalities.Add(InputModality.Audio);
168168

169-
await session.ConfigureConversationSessionAsync(sessionOptions).ConfigureAwait(false);
169+
await session.ConfigureSessionAsync(sessionOptions).ConfigureAwait(false);
170170

171171
// Process events from the session
172172
await foreach (SessionUpdate serverEvent in session.GetUpdatesAsync().ConfigureAwait(false))
@@ -196,22 +196,20 @@ VoiceLiveSessionOptions sessionOptions = new()
196196
{
197197
Temperature = 0.8f
198198
},
199-
TurnDetection = new AzureSemanticVad()
199+
TurnDetection = new AzureSemanticVadTurnDetection()
200200
{
201-
NegThreshold = 0.3f,
202-
WindowSize = 300,
203201
RemoveFillerWords = true
204202
},
205-
InputAudioFormat = AudioFormat.Pcm16,
206-
OutputAudioFormat = AudioFormat.Pcm16
203+
InputAudioFormat = InputAudioFormat.Pcm16,
204+
OutputAudioFormat = OutputAudioFormat.Pcm16
207205
};
208206

209207
// Ensure modalities include audio
210208
sessionOptions.Modalities.Clear();
211209
sessionOptions.Modalities.Add(InputModality.Text);
212210
sessionOptions.Modalities.Add(InputModality.Audio);
213211

214-
await session.ConfigureConversationSessionAsync(sessionOptions).ConfigureAwait(false);
212+
await session.ConfigureSessionAsync(sessionOptions).ConfigureAwait(false);
215213
```
216214

217215
### Function calling example
@@ -240,8 +238,8 @@ VoiceLiveSessionOptions sessionOptions = new()
240238
Model = model,
241239
Instructions = "You are a weather assistant. Use the get_current_weather function to help users with weather information.",
242240
Voice = new AzureStandardVoice("en-US-AvaNeural"),
243-
InputAudioFormat = AudioFormat.Pcm16,
244-
OutputAudioFormat = AudioFormat.Pcm16
241+
InputAudioFormat = InputAudioFormat.Pcm16,
242+
OutputAudioFormat = OutputAudioFormat.Pcm16
245243
};
246244

247245
// Add the function tool
@@ -252,7 +250,7 @@ sessionOptions.Modalities.Clear();
252250
sessionOptions.Modalities.Add(InputModality.Text);
253251
sessionOptions.Modalities.Add(InputModality.Audio);
254252

255-
await session.ConfigureConversationSessionAsync(sessionOptions).ConfigureAwait(false);
253+
await session.ConfigureSessionAsync(sessionOptions).ConfigureAwait(false);
256254
```
257255

258256
## Troubleshooting

sdk/ai/Azure.AI.VoiceLive/api/Azure.AI.VoiceLive.net8.0.cs

Lines changed: 218 additions & 329 deletions
Large diffs are not rendered by default.

sdk/ai/Azure.AI.VoiceLive/api/Azure.AI.VoiceLive.netstandard2.0.cs

Lines changed: 218 additions & 328 deletions
Large diffs are not rendered by default.

sdk/ai/Azure.AI.VoiceLive/samples/BasicVoiceAssistant/BasicVoiceAssistant.cs

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ namespace Azure.AI.VoiceLive.Samples;
1212
/// This sample now demonstrates some of the new convenience methods added to the VoiceLive SDK:
1313
/// - ClearStreamingAudioAsync() - Clears all input audio currently being streamed
1414
/// - CancelResponseAsync() - Cancels the current response generation (existing method)
15-
/// - ConfigureConversationSessionAsync() - Configures session options (existing method)
15+
/// - ConfigureSessionAsync() - Configures session options (existing method)
1616
///
1717
/// Additional convenience methods available but not shown in this sample:
1818
/// - StartAudioTurnAsync() / EndAudioTurnAsync() / CancelAudioTurnAsync() - Audio turn management
@@ -123,11 +123,11 @@ private async Task SetupSessionAsync(CancellationToken cancellationToken)
123123
var azureVoice = new AzureStandardVoice(_voice);
124124

125125
// Create strongly typed turn detection configuration
126-
var turnDetectionConfig = new ServerVad
126+
var turnDetectionConfig = new ServerVadTurnDetection
127127
{
128128
Threshold = 0.5f,
129-
PrefixPaddingMs = 300,
130-
SilenceDurationMs = 500
129+
PrefixPadding = TimeSpan.FromMilliseconds(300),
130+
SilenceDuration = TimeSpan.FromMilliseconds(500)
131131
};
132132

133133
// Create conversation session options
@@ -137,8 +137,8 @@ private async Task SetupSessionAsync(CancellationToken cancellationToken)
137137
Model = _model,
138138
Instructions = _instructions,
139139
Voice = azureVoice,
140-
InputAudioFormat = AudioFormat.Pcm16,
141-
OutputAudioFormat = AudioFormat.Pcm16,
140+
InputAudioFormat = InputAudioFormat.Pcm16,
141+
OutputAudioFormat = OutputAudioFormat.Pcm16,
142142
TurnDetection = turnDetectionConfig
143143
};
144144

@@ -147,7 +147,7 @@ private async Task SetupSessionAsync(CancellationToken cancellationToken)
147147
sessionOptions.Modalities.Add(InputModality.Text);
148148
sessionOptions.Modalities.Add(InputModality.Audio);
149149

150-
await _session!.ConfigureConversationSessionAsync(sessionOptions, cancellationToken).ConfigureAwait(false);
150+
await _session!.ConfigureSessionAsync(sessionOptions, cancellationToken).ConfigureAwait(false);
151151

152152
_logger.LogInformation("Session configuration sent");
153153
}

sdk/ai/Azure.AI.VoiceLive/samples/BasicVoiceAssistant/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ This sample now demonstrates some of the new convenience methods added to the Vo
88

99
**Used in this sample:**
1010
- `ClearStreamingAudioAsync()` - Clears all input audio currently being streamed
11-
- `ConfigureConversationSessionAsync()` - Configures conversation session options
11+
- `ConfigureSessionAsync()` - Configures conversation session options
1212
- `CancelResponseAsync()` - Cancels the current response generation
1313
- `SendInputAudioAsync()` - Sends audio data to the service
1414

sdk/ai/Azure.AI.VoiceLive/samples/CustomerServiceBot/CustomerServiceBot.cs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -126,11 +126,11 @@ private async Task SetupSessionAsync(CancellationToken cancellationToken)
126126
var azureVoice = new AzureStandardVoice(_voice);
127127

128128
// Create strongly typed turn detection configuration
129-
var turnDetectionConfig = new ServerVad
129+
var turnDetectionConfig = new ServerVadTurnDetection
130130
{
131131
Threshold = 0.5f,
132-
PrefixPaddingMs = 300,
133-
SilenceDurationMs = 500
132+
PrefixPadding = TimeSpan.FromMilliseconds(300),
133+
SilenceDuration = TimeSpan.FromMilliseconds(500)
134134
};
135135

136136
// Create conversation session options with function tools
@@ -139,8 +139,8 @@ private async Task SetupSessionAsync(CancellationToken cancellationToken)
139139
Model = _model,
140140
Instructions = _instructions,
141141
Voice = azureVoice,
142-
InputAudioFormat = AudioFormat.Pcm16,
143-
OutputAudioFormat = AudioFormat.Pcm16,
142+
InputAudioFormat = InputAudioFormat.Pcm16,
143+
OutputAudioFormat = OutputAudioFormat.Pcm16,
144144
TurnDetection = turnDetectionConfig
145145
};
146146

@@ -157,7 +157,7 @@ private async Task SetupSessionAsync(CancellationToken cancellationToken)
157157
sessionOptions.Tools.Add(CreateUpdateShippingAddressTool());
158158

159159

160-
await _session!.ConfigureConversationSessionAsync(sessionOptions, cancellationToken).ConfigureAwait(false);
160+
await _session!.ConfigureSessionAsync(sessionOptions, cancellationToken).ConfigureAwait(false);
161161

162162
_logger.LogInformation("Session configuration sent with {ToolCount} customer service tools", sessionOptions.Tools.Count);
163163
}

sdk/ai/Azure.AI.VoiceLive/samples/snippets/BasicUsageSnippets.cs

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -35,22 +35,22 @@ public async Task BasicVoiceAssistantExample()
3535
Model = model,
3636
Instructions = "You are a helpful AI assistant. Respond naturally and conversationally.",
3737
Voice = new AzureStandardVoice("en-US-AvaNeural"),
38-
TurnDetection = new ServerVad()
38+
TurnDetection = new AzureSemanticVadTurnDetection()
3939
{
4040
Threshold = 0.5f,
41-
PrefixPaddingMs = 300,
42-
SilenceDurationMs = 500
41+
PrefixPadding = TimeSpan.FromMilliseconds(300),
42+
SilenceDuration = TimeSpan.FromMilliseconds(500)
4343
},
44-
InputAudioFormat = AudioFormat.Pcm16,
45-
OutputAudioFormat = AudioFormat.Pcm16
44+
InputAudioFormat = InputAudioFormat.Pcm16,
45+
OutputAudioFormat = OutputAudioFormat.Pcm16
4646
};
4747

4848
// Ensure modalities include audio
4949
sessionOptions.Modalities.Clear();
5050
sessionOptions.Modalities.Add(InputModality.Text);
5151
sessionOptions.Modalities.Add(InputModality.Audio);
5252

53-
await session.ConfigureConversationSessionAsync(sessionOptions).ConfigureAwait(false);
53+
await session.ConfigureSessionAsync(sessionOptions).ConfigureAwait(false);
5454

5555
// Process events from the session
5656
await foreach (SessionUpdate serverEvent in session.GetUpdatesAsync().ConfigureAwait(false))
@@ -92,22 +92,20 @@ public async Task AdvancedVoiceConfiguration()
9292
{
9393
Temperature = 0.8f
9494
},
95-
TurnDetection = new AzureSemanticVad()
95+
TurnDetection = new AzureSemanticVadTurnDetection()
9696
{
97-
NegThreshold = 0.3f,
98-
WindowSize = 300,
9997
RemoveFillerWords = true
10098
},
101-
InputAudioFormat = AudioFormat.Pcm16,
102-
OutputAudioFormat = AudioFormat.Pcm16
99+
InputAudioFormat = InputAudioFormat.Pcm16,
100+
OutputAudioFormat = OutputAudioFormat.Pcm16
103101
};
104102

105103
// Ensure modalities include audio
106104
sessionOptions.Modalities.Clear();
107105
sessionOptions.Modalities.Add(InputModality.Text);
108106
sessionOptions.Modalities.Add(InputModality.Audio);
109107

110-
await session.ConfigureConversationSessionAsync(sessionOptions).ConfigureAwait(false);
108+
await session.ConfigureSessionAsync(sessionOptions).ConfigureAwait(false);
111109
#endregion
112110
}
113111

@@ -148,8 +146,8 @@ public async Task FunctionCallingExample()
148146
Model = model,
149147
Instructions = "You are a weather assistant. Use the get_current_weather function to help users with weather information.",
150148
Voice = new AzureStandardVoice("en-US-AvaNeural"),
151-
InputAudioFormat = AudioFormat.Pcm16,
152-
OutputAudioFormat = AudioFormat.Pcm16
149+
InputAudioFormat = InputAudioFormat.Pcm16,
150+
OutputAudioFormat = OutputAudioFormat.Pcm16
153151
};
154152

155153
// Add the function tool
@@ -160,7 +158,7 @@ public async Task FunctionCallingExample()
160158
sessionOptions.Modalities.Add(InputModality.Text);
161159
sessionOptions.Modalities.Add(InputModality.Audio);
162160

163-
await session.ConfigureConversationSessionAsync(sessionOptions).ConfigureAwait(false);
161+
await session.ConfigureSessionAsync(sessionOptions).ConfigureAwait(false);
164162
#endregion
165163
}
166164
}
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
// Copyright (c) Microsoft Corporation. All rights reserved.
2+
// Licensed under the MIT License.
3+
4+
#nullable disable
5+
6+
using System;
7+
using System.Collections.Generic;
8+
9+
namespace Azure.AI.VoiceLive
10+
{
11+
/// <summary> Configuration for animation outputs including blendshapes, visemes, and emotion metadata. </summary>
12+
public partial class AnimationOptions
13+
{
14+
/// <summary> Interval for emotion detection in milliseconds. If not set, emotion detection is disabled. </summary>
15+
public int? EmotionDetectionIntervalMs { get; set; }
16+
17+
/// <summary> Interval for emotion detection. If not set, emotion detection is disabled. </summary>
18+
public TimeSpan? EmotionDetectionInterval
19+
{
20+
get => EmotionDetectionIntervalMs.HasValue ? TimeSpan.FromMilliseconds(EmotionDetectionIntervalMs.Value) : (TimeSpan?)null;
21+
set => EmotionDetectionIntervalMs = value.HasValue ? (int?)value.Value.TotalMilliseconds : null;
22+
}
23+
}
24+
}
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
// Copyright (c) Microsoft Corporation. All rights reserved.
2+
// Licensed under the MIT License.
3+
4+
#nullable disable
5+
6+
using System;
7+
using System.Collections.Generic;
8+
using System.Linq;
9+
10+
namespace Azure.AI.VoiceLive
11+
{
12+
/// <summary> The AssistantMessageItem. </summary>
13+
public partial class AssistantMessageItem : MessageItem
14+
{
15+
/// <summary> Initializes a new instance of <see cref="AssistantMessageItem"/>. </summary>
16+
/// <param name="content"></param>
17+
/// <exception cref="ArgumentNullException"> <paramref name="content"/> is null. </exception>
18+
public AssistantMessageItem(OutputTextContentPart content) : this(new[] { content }) { }
19+
}
20+
}

0 commit comments

Comments
 (0)