3
3
4
4
using System . Threading . Channels ;
5
5
using NAudio . Wave ;
6
+ using Azure . Core . Pipeline ;
6
7
7
8
namespace Azure . AI . VoiceLive . Samples ;
8
9
9
10
/// <summary>
10
11
/// Handles real-time audio capture and playback for the voice assistant.
11
- ///
12
+ ///
12
13
/// This processor demonstrates some of the new VoiceLive SDK convenience methods:
13
14
/// - Uses existing SendInputAudioAsync() method for audio streaming
14
15
/// - Shows how convenience methods simplify audio operations
15
- ///
16
+ ///
16
17
/// Additional convenience methods available in the SDK:
17
18
/// - StartAudioTurnAsync() / AppendAudioToTurnAsync() / EndAudioTurnAsync() - Audio turn management
18
19
/// - ClearStreamingAudioAsync() - Clear all streaming audio
19
20
/// - ConnectAvatarAsync() - Avatar connection with SDP
20
- ///
21
+ ///
21
22
/// Threading Architecture:
22
23
/// - Main thread: Event loop and UI
23
24
/// - Capture thread: NAudio input stream reading
@@ -28,29 +29,29 @@ public class AudioProcessor : IDisposable
28
29
{
29
30
private readonly VoiceLiveSession _session ;
30
31
private readonly ILogger < AudioProcessor > _logger ;
31
-
32
+
32
33
// Audio configuration - PCM16, 24kHz, mono as specified
33
34
private const int SampleRate = 24000 ;
34
35
private const int Channels = 1 ;
35
36
private const int BitsPerSample = 16 ;
36
-
37
+
37
38
// NAudio components
38
39
private WaveInEvent ? _waveIn ;
39
40
private WaveOutEvent ? _waveOut ;
40
41
private BufferedWaveProvider ? _playbackBuffer ;
41
-
42
+
42
43
// Audio capture and playback state
43
44
private bool _isCapturing ;
44
45
private bool _isPlaying ;
45
-
46
+
46
47
// Audio streaming channels
47
48
private readonly Channel < byte [ ] > _audioSendChannel ;
48
49
private readonly Channel < byte [ ] > _audioPlaybackChannel ;
49
50
private readonly ChannelWriter < byte [ ] > _audioSendWriter ;
50
51
private readonly ChannelReader < byte [ ] > _audioSendReader ;
51
52
private readonly ChannelWriter < byte [ ] > _audioPlaybackWriter ;
52
53
private readonly ChannelReader < byte [ ] > _audioPlaybackReader ;
53
-
54
+
54
55
// Background tasks
55
56
private Task ? _audioSendTask ;
56
57
private Task ? _audioPlaybackTask ;
@@ -66,45 +67,45 @@ public AudioProcessor(VoiceLiveSession session, ILogger<AudioProcessor> logger)
66
67
{
67
68
_session = session ?? throw new ArgumentNullException ( nameof ( session ) ) ;
68
69
_logger = logger ?? throw new ArgumentNullException ( nameof ( logger ) ) ;
69
-
70
+
70
71
// Create unbounded channels for audio data
71
72
_audioSendChannel = Channel . CreateUnbounded < byte [ ] > ( ) ;
72
73
_audioSendWriter = _audioSendChannel . Writer ;
73
74
_audioSendReader = _audioSendChannel . Reader ;
74
-
75
+
75
76
_audioPlaybackChannel = Channel . CreateUnbounded < byte [ ] > ( ) ;
76
77
_audioPlaybackWriter = _audioPlaybackChannel . Writer ;
77
78
_audioPlaybackReader = _audioPlaybackChannel . Reader ;
78
-
79
+
79
80
_cancellationTokenSource = new CancellationTokenSource ( ) ;
80
81
_playbackCancellationTokenSource = new CancellationTokenSource ( ) ;
81
82
82
83
_logger . LogInformation ( "AudioProcessor initialized with {SampleRate}Hz PCM16 mono audio" , SampleRate ) ;
83
84
}
84
-
85
+
85
86
/// <summary>
86
87
/// Start capturing audio from microphone.
87
88
/// </summary>
88
89
public Task StartCaptureAsync ( )
89
90
{
90
91
if ( _isCapturing )
91
92
return Task . CompletedTask ;
92
-
93
+
93
94
_isCapturing = true ;
94
-
95
+
95
96
try
96
97
{
97
98
_waveIn = new WaveInEvent
98
99
{
99
100
WaveFormat = new WaveFormat ( SampleRate , BitsPerSample , Channels ) ,
100
101
BufferMilliseconds = 50 // 50ms buffer for low latency
101
102
} ;
102
-
103
+
103
104
_waveIn . DataAvailable += OnAudioDataAvailable ;
104
105
_waveIn . RecordingStopped += OnRecordingStopped ;
105
106
106
107
_logger . LogInformation ( $ "There are { WaveIn . DeviceCount } devices available.") ;
107
- for ( int i = 0 ; i < WaveIn . DeviceCount ; i ++ )
108
+ for ( int i = 0 ; i < WaveIn . DeviceCount ; i ++ )
108
109
{
109
110
var deviceInfo = WaveIn . GetCapabilities ( i ) ;
110
111
@@ -113,10 +114,10 @@ public Task StartCaptureAsync()
113
114
_waveIn . DeviceNumber = 1 ;
114
115
115
116
_waveIn . StartRecording ( ) ;
116
-
117
+
117
118
// Start audio send task
118
119
_audioSendTask = ProcessAudioSendAsync ( _cancellationTokenSource . Token ) ;
119
-
120
+
120
121
_logger . LogInformation ( "Started audio capture" ) ;
121
122
return Task . CompletedTask ;
122
123
}
@@ -127,17 +128,17 @@ public Task StartCaptureAsync()
127
128
throw ;
128
129
}
129
130
}
130
-
131
+
131
132
/// <summary>
132
133
/// Stop capturing audio.
133
134
/// </summary>
134
135
public async Task StopCaptureAsync ( )
135
136
{
136
137
if ( ! _isCapturing )
137
138
return ;
138
-
139
+
139
140
_isCapturing = false ;
140
-
141
+
141
142
if ( _waveIn != null )
142
143
{
143
144
_waveIn . StopRecording ( ) ;
@@ -146,49 +147,49 @@ public async Task StopCaptureAsync()
146
147
_waveIn . Dispose ( ) ;
147
148
_waveIn = null ;
148
149
}
149
-
150
+
150
151
// Complete the send channel and wait for the send task
151
152
_audioSendWriter . TryComplete ( ) ;
152
153
if ( _audioSendTask != null )
153
154
{
154
155
await _audioSendTask . ConfigureAwait ( false ) ;
155
156
_audioSendTask = null ;
156
157
}
157
-
158
+
158
159
_logger . LogInformation ( "Stopped audio capture" ) ;
159
160
}
160
-
161
+
161
162
/// <summary>
162
163
/// Initialize audio playback system.
163
164
/// </summary>
164
165
public Task StartPlaybackAsync ( )
165
166
{
166
167
if ( _isPlaying )
167
168
return Task . CompletedTask ;
168
-
169
+
169
170
_isPlaying = true ;
170
-
171
+
171
172
try
172
173
{
173
174
_waveOut = new WaveOutEvent
174
175
{
175
176
DesiredLatency = 100 // 100ms latency
176
177
} ;
177
-
178
+
178
179
_playbackBuffer = new BufferedWaveProvider ( new WaveFormat ( SampleRate , BitsPerSample , Channels ) )
179
180
{
180
181
BufferDuration = TimeSpan . FromMinutes ( 5 ) , // 5 second buffer
181
182
DiscardOnBufferOverflow = true
182
183
} ;
183
-
184
+
184
185
_waveOut . Init ( _playbackBuffer ) ;
185
186
_waveOut . Play ( ) ;
186
187
187
188
_playbackCancellationTokenSource = new CancellationTokenSource ( ) ;
188
189
189
190
// Start audio playback task
190
191
_audioPlaybackTask = ProcessAudioPlaybackAsync ( ) ;
191
-
192
+
192
193
_logger . LogInformation ( "Audio playback system ready" ) ;
193
194
return Task . CompletedTask ;
194
195
}
@@ -199,34 +200,35 @@ public Task StartPlaybackAsync()
199
200
throw ;
200
201
}
201
202
}
202
-
203
+
203
204
/// <summary>
204
205
/// Stop audio playback and clear buffer.
205
206
/// </summary>
206
207
public async Task StopPlaybackAsync ( )
207
208
{
208
209
if ( ! _isPlaying )
209
210
return ;
210
-
211
+
211
212
_isPlaying = false ;
212
-
213
+
213
214
// Clear the playback channel
214
- while ( _audioPlaybackReader . TryRead ( out _ ) ) { }
215
-
215
+ while ( _audioPlaybackReader . TryRead ( out _ ) )
216
+ { }
217
+
216
218
if ( _playbackBuffer != null )
217
219
{
218
220
_playbackBuffer . ClearBuffer ( ) ;
219
221
}
220
-
222
+
221
223
if ( _waveOut != null )
222
224
{
223
225
_waveOut . Stop ( ) ;
224
226
_waveOut . Dispose ( ) ;
225
227
_waveOut = null ;
226
228
}
227
-
229
+
228
230
_playbackBuffer = null ;
229
-
231
+
230
232
// Complete the playback channel and wait for the playback task
231
233
_playbackCancellationTokenSource . Cancel ( ) ;
232
234
@@ -235,10 +237,10 @@ public async Task StopPlaybackAsync()
235
237
await _audioPlaybackTask . ConfigureAwait ( false ) ;
236
238
_audioPlaybackTask = null ;
237
239
}
238
-
240
+
239
241
_logger . LogInformation ( "Stopped audio playback" ) ;
240
242
}
241
-
243
+
242
244
/// <summary>
243
245
/// Queue audio data for playback.
244
246
/// </summary>
@@ -250,7 +252,7 @@ public async Task QueueAudioAsync(byte[] audioData)
250
252
await _audioPlaybackWriter . WriteAsync ( audioData ) . ConfigureAwait ( false ) ;
251
253
}
252
254
}
253
-
255
+
254
256
/// <summary>
255
257
/// Event handler for audio data available from microphone.
256
258
/// </summary>
@@ -260,15 +262,15 @@ private void OnAudioDataAvailable(object? sender, WaveInEventArgs e)
260
262
{
261
263
byte [ ] audioData = new byte [ e . BytesRecorded ] ;
262
264
Array . Copy ( e . Buffer , 0 , audioData , 0 , e . BytesRecorded ) ;
263
-
265
+
264
266
// Queue audio data for sending (non-blocking)
265
267
if ( ! _audioSendWriter . TryWrite ( audioData ) )
266
268
{
267
269
_logger . LogWarning ( "Failed to queue audio data for sending - channel may be full" ) ;
268
270
}
269
271
}
270
272
}
271
-
273
+
272
274
/// <summary>
273
275
/// Event handler for recording stopped.
274
276
/// </summary>
@@ -279,19 +281,19 @@ private void OnRecordingStopped(object? sender, StoppedEventArgs e)
279
281
_logger . LogError ( e . Exception , "Audio recording stopped due to error" ) ;
280
282
}
281
283
}
282
-
284
+
283
285
/// <summary>
284
286
/// Background task to process audio data and send to VoiceLive service.
285
287
/// </summary>
286
288
private async Task ProcessAudioSendAsync ( CancellationToken cancellationToken )
287
289
{
288
290
try
289
291
{
290
- await foreach ( byte [ ] audioData in _audioSendReader . ReadAllAsync ( cancellationToken ) )
292
+ await foreach ( byte [ ] audioData in _audioSendReader . ReadAllAsync ( cancellationToken ) . ConfigureAwait ( false ) )
291
293
{
292
294
if ( cancellationToken . IsCancellationRequested )
293
295
break ;
294
-
296
+
295
297
try
296
298
{
297
299
// Send audio data directly to the session using the convenience method
@@ -315,7 +317,7 @@ private async Task ProcessAudioSendAsync(CancellationToken cancellationToken)
315
317
_logger . LogError ( ex , "Error in audio send processing" ) ;
316
318
}
317
319
}
318
-
320
+
319
321
/// <summary>
320
322
/// Background task to process audio playback.
321
323
/// </summary>
@@ -326,11 +328,11 @@ private async Task ProcessAudioPlaybackAsync()
326
328
CancellationTokenSource combinedTokenSource = CancellationTokenSource . CreateLinkedTokenSource ( _playbackCancellationTokenSource . Token , _cancellationTokenSource . Token ) ;
327
329
var cancellationToken = combinedTokenSource . Token ;
328
330
329
- await foreach ( byte [ ] audioData in _audioPlaybackReader . ReadAllAsync ( cancellationToken ) )
331
+ await foreach ( byte [ ] audioData in _audioPlaybackReader . ReadAllAsync ( cancellationToken ) . ConfigureAwait ( false ) )
330
332
{
331
333
if ( cancellationToken . IsCancellationRequested )
332
334
break ;
333
-
335
+
334
336
try
335
337
{
336
338
if ( _playbackBuffer != null && _isPlaying )
@@ -354,36 +356,38 @@ private async Task ProcessAudioPlaybackAsync()
354
356
_logger . LogError ( ex , "Error in audio playback processing" ) ;
355
357
}
356
358
}
357
-
359
+
358
360
/// <summary>
359
361
/// Clean up audio resources.
360
362
/// </summary>
361
363
public async Task CleanupAsync ( )
362
364
{
363
365
await StopCaptureAsync ( ) . ConfigureAwait ( false ) ;
364
366
await StopPlaybackAsync ( ) . ConfigureAwait ( false ) ;
365
-
367
+
366
368
_cancellationTokenSource . Cancel ( ) ;
367
-
369
+
368
370
// Wait for background tasks to complete
369
371
var tasks = new List < Task > ( ) ;
370
- if ( _audioSendTask != null ) tasks . Add ( _audioSendTask ) ;
371
- if ( _audioPlaybackTask != null ) tasks . Add ( _audioPlaybackTask ) ;
372
-
372
+ if ( _audioSendTask != null )
373
+ tasks . Add ( _audioSendTask ) ;
374
+ if ( _audioPlaybackTask != null )
375
+ tasks . Add ( _audioPlaybackTask ) ;
376
+
373
377
if ( tasks . Count > 0 )
374
378
{
375
379
await Task . WhenAll ( tasks ) . ConfigureAwait ( false ) ;
376
380
}
377
-
381
+
378
382
_logger . LogInformation ( "Audio processor cleaned up" ) ;
379
383
}
380
-
384
+
381
385
/// <summary>
382
386
/// Dispose of resources.
383
387
/// </summary>
384
388
public void Dispose ( )
385
389
{
386
- CleanupAsync ( ) . GetAwaiter ( ) . GetResult ( ) ;
390
+ CleanupAsync ( ) . Wait ( ) ;
387
391
_cancellationTokenSource . Dispose ( ) ;
388
392
}
389
393
}
0 commit comments