Skip to content

Commit b5987d6

Browse files
committed
python synthesizer events
1 parent 2291f62 commit b5987d6

File tree

1 file changed

+77
-3
lines changed
  • articles/cognitive-services/Speech-Service/includes/how-to/speech-synthesis

1 file changed

+77
-3
lines changed

articles/cognitive-services/Speech-Service/includes/how-to/speech-synthesis/python.md

Lines changed: 77 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -157,11 +157,85 @@ While using the [SpeechSynthesizer](/python/api/azure-cognitiveservices-speech/a
157157

158158
[!INCLUDE [Event types](events.md)]
159159

160-
Here's an example that shows how to subscribe to the `BookmarkReached` event for speech synthesis.
160+
Here's an example that shows how to subscribe to events for speech synthesis. You can follow the instructions in the [quickstart](../../../get-started-text-to-speech.md?pivots=python), but replace the contents of that `Program.cs` file with the following C# code.
161161

162162
```python
163-
speech_synthesizer.bookmark_reached.connect(lambda evt: print(
164-
"Bookmark reached: {}, audio offset: {}ms, bookmark text: {}.".format(evt, evt.audio_offset / 10000, evt.text)))
163+
import os
164+
import azure.cognitiveservices.speech as speechsdk
165+
166+
def speech_synthesizer_bookmark_reached_cb(evt: speechsdk.SessionEventArgs):
167+
print('BookmarkReached event')
168+
print('\tAudioOffset: {}ms'.format((evt.audio_offset + 5000) / 10000))
169+
print('\tText: {}'.format(evt.text))
170+
171+
def speech_synthesizer_synthesis_canceled_cb(evt: speechsdk.SessionEventArgs):
172+
print('SynthesisCanceled event')
173+
174+
def speech_synthesizer_synthesis_completed_cb(evt: speechsdk.SessionEventArgs):
175+
print('SynthesisCompleted event')
176+
print('\tAudioData: {} bytes'.format(len(evt.result.audio_data)))
177+
print('\tAudioDuration: {}'.format(evt.result.audio_duration))
178+
179+
def speech_synthesizer_synthesis_started_cb(evt: speechsdk.SessionEventArgs):
180+
print('SynthesisStarted event')
181+
182+
def speech_synthesizer_word_boundary_cb(evt: speechsdk.SessionEventArgs):
183+
print('WordBoundary event')
184+
print('\tBoundaryType: {}'.format(evt.boundary_type))
185+
print('\tAudioOffset: {}ms'.format((evt.audio_offset + 5000) / 10000))
186+
print('\tDuration: {}'.format(evt.duration))
187+
print('\tText: {}'.format(evt.text))
188+
print('\tTextOffset: {}'.format(evt.text_offset))
189+
print('\tWordLength: {}'.format(evt.word_length))
190+
191+
def speech_synthesizer_synthesizing_cb(evt: speechsdk.SessionEventArgs):
192+
print('Synthesizing event')
193+
print('\tAudioData: {} bytes'.format(len(evt.result.audio_data)))
194+
195+
def speech_synthesizer_viseme_received_cb(evt: speechsdk.SessionEventArgs):
196+
print('VisemeReceived event')
197+
print('\tAudioOffset: {}ms'.format((evt.audio_offset + 5000) / 10000))
198+
print('\tVisemeId: {}'.format(evt.viseme_id))
199+
200+
speech_config = speechsdk.SpeechConfig(subscription=os.environ.get('SPEECH_KEY'), region=os.environ.get('SPEECH_REGION'))
201+
audio_config = speechsdk.audio.AudioOutputConfig(use_default_speaker=True)
202+
speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config)
203+
204+
# Subscribe to events
205+
speech_synthesizer.bookmark_reached.connect(speech_synthesizer_bookmark_reached_cb)
206+
speech_synthesizer.synthesis_canceled.connect(speech_synthesizer_synthesis_canceled_cb)
207+
speech_synthesizer.synthesis_completed.connect(speech_synthesizer_synthesis_completed_cb)
208+
speech_synthesizer.synthesis_started.connect(speech_synthesizer_synthesis_started_cb)
209+
speech_synthesizer.synthesis_word_boundary.connect(speech_synthesizer_word_boundary_cb)
210+
speech_synthesizer.synthesizing.connect(speech_synthesizer_synthesizing_cb)
211+
speech_synthesizer.viseme_received.connect(speech_synthesizer_viseme_received_cb)
212+
213+
# Required for WordBoundary event sentences.
214+
speech_config.set_property(property_id=speechsdk.PropertyId.SpeechServiceResponse_RequestSentenceBoundary, value='true')
215+
216+
# The language of the voice that speaks.
217+
speech_synthesis_voice_name='en-US-JennyNeural'
218+
219+
ssml = """<speak version='1.0' xml:lang='en-US' xmlns='http://www.w3.org/2001/10/synthesis' xmlns:mstts='http://www.w3.org/2001/mstts'>
220+
<voice name='{}'>
221+
<mstts:viseme type='redlips_front'/>
222+
The rainbow has seven colors: <bookmark mark='colors_list_begin'/>Red, orange, yellow, green, blue, indigo, and violet.<bookmark mark='colors_list_end'/>.
223+
</voice>
224+
</speak>""".format(speech_synthesis_voice_name)
225+
226+
# Synthesize the SSML
227+
print("SSML to synthesize: \r\n{}".format(ssml))
228+
speech_synthesis_result = speech_synthesizer.speak_ssml_async(ssml).get()
229+
230+
if speech_synthesis_result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
231+
print("SynthesizingAudioCompleted result")
232+
elif speech_synthesis_result.reason == speechsdk.ResultReason.Canceled:
233+
cancellation_details = speech_synthesis_result.cancellation_details
234+
print("Speech synthesis canceled: {}".format(cancellation_details.reason))
235+
if cancellation_details.reason == speechsdk.CancellationReason.Error:
236+
if cancellation_details.error_details:
237+
print("Error details: {}".format(cancellation_details.error_details))
238+
print("Did you set the speech resource key and region values?")
165239
```
166240

167241
You can find more text-to-speech samples at [GitHub](https://aka.ms/csspeech/samples).

0 commit comments

Comments
 (0)