@@ -157,11 +157,85 @@ While using the [SpeechSynthesizer](/python/api/azure-cognitiveservices-speech/a
157
157
158
158
[ !INCLUDE [ Event types] ( events.md )]
159
159
160
- Here's an example that shows how to subscribe to the ` BookmarkReached ` event for speech synthesis .
160
+ Here's an example that shows how to subscribe to events for speech synthesis. You can follow the instructions in the [ quickstart ] ( ../../../get-started-text-to-speech.md?pivots=python ) , but replace the contents of that ` Program.cs ` file with the following C# code .
161
161
162
162
``` python
163
- speech_synthesizer.bookmark_reached.connect(lambda evt : print (
164
- " Bookmark reached: {} , audio offset: {} ms, bookmark text: {} ." .format(evt, evt.audio_offset / 10000 , evt.text)))
163
+ import os
164
+ import azure.cognitiveservices.speech as speechsdk
165
+
166
+ def speech_synthesizer_bookmark_reached_cb (evt : speechsdk.SessionEventArgs):
167
+ print (' BookmarkReached event' )
168
+ print (' \t AudioOffset: {} ms' .format((evt.audio_offset + 5000 ) / 10000 ))
169
+ print (' \t Text: {} ' .format(evt.text))
170
+
171
+ def speech_synthesizer_synthesis_canceled_cb (evt : speechsdk.SessionEventArgs):
172
+ print (' SynthesisCanceled event' )
173
+
174
+ def speech_synthesizer_synthesis_completed_cb (evt : speechsdk.SessionEventArgs):
175
+ print (' SynthesisCompleted event' )
176
+ print (' \t AudioData: {} bytes' .format(len (evt.result.audio_data)))
177
+ print (' \t AudioDuration: {} ' .format(evt.result.audio_duration))
178
+
179
+ def speech_synthesizer_synthesis_started_cb (evt : speechsdk.SessionEventArgs):
180
+ print (' SynthesisStarted event' )
181
+
182
+ def speech_synthesizer_word_boundary_cb (evt : speechsdk.SessionEventArgs):
183
+ print (' WordBoundary event' )
184
+ print (' \t BoundaryType: {} ' .format(evt.boundary_type))
185
+ print (' \t AudioOffset: {} ms' .format((evt.audio_offset + 5000 ) / 10000 ))
186
+ print (' \t Duration: {} ' .format(evt.duration))
187
+ print (' \t Text: {} ' .format(evt.text))
188
+ print (' \t TextOffset: {} ' .format(evt.text_offset))
189
+ print (' \t WordLength: {} ' .format(evt.word_length))
190
+
191
+ def speech_synthesizer_synthesizing_cb (evt : speechsdk.SessionEventArgs):
192
+ print (' Synthesizing event' )
193
+ print (' \t AudioData: {} bytes' .format(len (evt.result.audio_data)))
194
+
195
+ def speech_synthesizer_viseme_received_cb (evt : speechsdk.SessionEventArgs):
196
+ print (' VisemeReceived event' )
197
+ print (' \t AudioOffset: {} ms' .format((evt.audio_offset + 5000 ) / 10000 ))
198
+ print (' \t VisemeId: {} ' .format(evt.viseme_id))
199
+
200
+ speech_config = speechsdk.SpeechConfig(subscription = os.environ.get(' SPEECH_KEY' ), region = os.environ.get(' SPEECH_REGION' ))
201
+ audio_config = speechsdk.audio.AudioOutputConfig(use_default_speaker = True )
202
+ speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config = speech_config, audio_config = audio_config)
203
+
204
+ # Subscribe to events
205
+ speech_synthesizer.bookmark_reached.connect(speech_synthesizer_bookmark_reached_cb)
206
+ speech_synthesizer.synthesis_canceled.connect(speech_synthesizer_synthesis_canceled_cb)
207
+ speech_synthesizer.synthesis_completed.connect(speech_synthesizer_synthesis_completed_cb)
208
+ speech_synthesizer.synthesis_started.connect(speech_synthesizer_synthesis_started_cb)
209
+ speech_synthesizer.synthesis_word_boundary.connect(speech_synthesizer_word_boundary_cb)
210
+ speech_synthesizer.synthesizing.connect(speech_synthesizer_synthesizing_cb)
211
+ speech_synthesizer.viseme_received.connect(speech_synthesizer_viseme_received_cb)
212
+
213
+ # Required for WordBoundary event sentences.
214
+ speech_config.set_property(property_id = speechsdk.PropertyId.SpeechServiceResponse_RequestSentenceBoundary, value = ' true' )
215
+
216
+ # The language of the voice that speaks.
217
+ speech_synthesis_voice_name= ' en-US-JennyNeural'
218
+
219
+ ssml = """ <speak version='1.0' xml:lang='en-US' xmlns='http://www.w3.org/2001/10/synthesis' xmlns:mstts='http://www.w3.org/2001/mstts'>
220
+ <voice name='{} '>
221
+ <mstts:viseme type='redlips_front'/>
222
+ The rainbow has seven colors: <bookmark mark='colors_list_begin'/>Red, orange, yellow, green, blue, indigo, and violet.<bookmark mark='colors_list_end'/>.
223
+ </voice>
224
+ </speak>""" .format(speech_synthesis_voice_name)
225
+
226
+ # Synthesize the SSML
227
+ print (" SSML to synthesize: \r\n {} " .format(ssml))
228
+ speech_synthesis_result = speech_synthesizer.speak_ssml_async(ssml).get()
229
+
230
+ if speech_synthesis_result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
231
+ print (" SynthesizingAudioCompleted result" )
232
+ elif speech_synthesis_result.reason == speechsdk.ResultReason.Canceled:
233
+ cancellation_details = speech_synthesis_result.cancellation_details
234
+ print (" Speech synthesis canceled: {} " .format(cancellation_details.reason))
235
+ if cancellation_details.reason == speechsdk.CancellationReason.Error:
236
+ if cancellation_details.error_details:
237
+ print (" Error details: {} " .format(cancellation_details.error_details))
238
+ print (" Did you set the speech resource key and region values?" )
165
239
```
166
240
167
241
You can find more text-to-speech samples at [ GitHub] ( https://aka.ms/csspeech/samples ) .
0 commit comments