Skip to content

Commit b182e11

Browse files
committed
go events
1 parent 8b6184c commit b182e11

File tree

1 file changed

+142
-0
lines changed
  • articles/cognitive-services/Speech-Service/includes/how-to/speech-synthesis

1 file changed

+142
-0
lines changed

articles/cognitive-services/Speech-Service/includes/how-to/speech-synthesis/go.md

Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -328,4 +328,146 @@ Next, you need to change the speech synthesis request to reference your XML file
328328
> [!NOTE]
329329
> To set the voice without using SSML, you can set the property on `SpeechConfig` by using `speechConfig.SetSpeechSynthesisVoiceName("en-US-JennyNeural")`.
330330
331+
## Subscribe to synthesizer events
331332

333+
You might want more insights about the text-to-speech processing and results. For example, you might want to know when the synthesizer starts and stops, or you might want to know about other events encountered during synthesis.
334+
335+
While using the [SpeechSynthesizer](https://pkg.go.dev/github.com/Microsoft/cognitive-services-speech-sdk-go/speech#SpeechSynthesizer) for text-to-speech, you can subscribe to the events in this table:
336+
337+
[!INCLUDE [Event types](events.md)]
338+
339+
Here's an example that shows how to subscribe to events for speech synthesis. You can follow the instructions in the [quickstart](../../../get-started-text-to-speech.md?pivots=go), but replace the contents of that `speech-synthesis.go` file with the following Go code.
340+
341+
```go
342+
package main
343+
344+
import (
345+
"fmt"
346+
"os"
347+
"time"
348+
349+
"github.com/Microsoft/cognitive-services-speech-sdk-go/audio"
350+
"github.com/Microsoft/cognitive-services-speech-sdk-go/common"
351+
"github.com/Microsoft/cognitive-services-speech-sdk-go/speech"
352+
)
353+
354+
func bookmarkReachedHandler(event speech.SpeechSynthesisBookmarkEventArgs) {
355+
defer event.Close()
356+
fmt.Println("BookmarkReached event")
357+
}
358+
359+
func synthesisCanceledHandler(event speech.SpeechSynthesisEventArgs) {
360+
defer event.Close()
361+
fmt.Println("SynthesisCanceled event")
362+
}
363+
364+
func synthesisCompletedHandler(event speech.SpeechSynthesisEventArgs) {
365+
defer event.Close()
366+
fmt.Println("SynthesisCompleted event")
367+
fmt.Printf("\tAudioData: %d bytes\n", len(event.Result.AudioData))
368+
fmt.Printf("\tAudioDuration: %d\n", event.Result.AudioDuration)
369+
}
370+
371+
func synthesisStartedHandler(event speech.SpeechSynthesisEventArgs) {
372+
defer event.Close()
373+
fmt.Println("SynthesisStarted event")
374+
}
375+
376+
func synthesizingHandler(event speech.SpeechSynthesisEventArgs) {
377+
defer event.Close()
378+
fmt.Println("Synthesizing event")
379+
fmt.Printf("\tAudioData %d bytes\n", len(event.Result.AudioData))
380+
}
381+
382+
func visemeReceivedHandler(event speech.SpeechSynthesisVisemeEventArgs) {
383+
defer event.Close()
384+
fmt.Println("VisemeReceived event")
385+
fmt.Printf("\tAudioOffset: %dms\n", (event.AudioOffset+5000)/10000)
386+
fmt.Printf("\tVisemeID %d\n", event.VisemeID)
387+
}
388+
389+
func wordBoundaryHandler(event speech.SpeechSynthesisWordBoundaryEventArgs) {
390+
defer event.Close()
391+
fmt.Println("WordBoundary event")
392+
fmt.Printf("\tBoundaryType %d\n", event.BoundaryType)
393+
fmt.Printf("\tAudioOffset: %dms\n", (event.AudioOffset+5000)/10000)
394+
fmt.Printf("\tDuration %d\n", event.Duration)
395+
fmt.Printf("\tText %s\n", event.Text)
396+
fmt.Printf("\tTextOffset %d\n", event.TextOffset)
397+
fmt.Printf("\tWordLength %d\n", event.WordLength)
398+
}
399+
400+
func main() {
401+
speechKey := os.Getenv("SPEECH_KEY")
402+
speechRegion := os.Getenv("SPEECH_REGION")
403+
404+
audioConfig, err := audio.NewAudioConfigFromDefaultSpeakerOutput()
405+
if err != nil {
406+
fmt.Println("Got an error: ", err)
407+
return
408+
}
409+
defer audioConfig.Close()
410+
speechConfig, err := speech.NewSpeechConfigFromSubscription(speechKey, speechRegion)
411+
if err != nil {
412+
fmt.Println("Got an error: ", err)
413+
return
414+
}
415+
defer speechConfig.Close()
416+
417+
speechSynthesizer, err := speech.NewSpeechSynthesizerFromConfig(speechConfig, audioConfig)
418+
if err != nil {
419+
fmt.Println("Got an error: ", err)
420+
return
421+
}
422+
defer speechSynthesizer.Close()
423+
424+
speechSynthesizer.BookmarkReached(bookmarkReachedHandler)
425+
speechSynthesizer.SynthesisCanceled(synthesisCanceledHandler)
426+
speechSynthesizer.SynthesisCompleted(synthesisCompletedHandler)
427+
speechSynthesizer.SynthesisStarted(synthesisStartedHandler)
428+
speechSynthesizer.Synthesizing(synthesizingHandler)
429+
speechSynthesizer.VisemeReceived(visemeReceivedHandler)
430+
speechSynthesizer.WordBoundary(wordBoundaryHandler)
431+
432+
speechSynthesisVoiceName := "en-US-JennyNeural"
433+
434+
ssml := fmt.Sprintf(`<speak version='1.0' xml:lang='en-US' xmlns='http://www.w3.org/2001/10/synthesis' xmlns:mstts='http://www.w3.org/2001/mstts'>
435+
<voice name='%s'>
436+
<mstts:viseme type='redlips_front'/>
437+
The rainbow has seven colors: <bookmark mark='colors_list_begin'/>Red, orange, yellow, green, blue, indigo, and violet.<bookmark mark='colors_list_end'/>.
438+
</voice>
439+
</speak>`, speechSynthesisVoiceName)
440+
441+
// Synthesize the SSML
442+
fmt.Printf("SSML to synthesize: \n\t%s\n", ssml)
443+
task := speechSynthesizer.SpeakSsmlAsync(ssml)
444+
445+
var outcome speech.SpeechSynthesisOutcome
446+
select {
447+
case outcome = <-task:
448+
case <-time.After(60 * time.Second):
449+
fmt.Println("Timed out")
450+
return
451+
}
452+
defer outcome.Close()
453+
if outcome.Error != nil {
454+
fmt.Println("Got an error: ", outcome.Error)
455+
return
456+
}
457+
458+
if outcome.Result.Reason == common.SynthesizingAudioCompleted {
459+
fmt.Println("SynthesizingAudioCompleted result")
460+
} else {
461+
cancellation, _ := speech.NewCancellationDetailsFromSpeechSynthesisResult(outcome.Result)
462+
fmt.Printf("CANCELED: Reason=%d.\n", cancellation.Reason)
463+
464+
if cancellation.Reason == common.Error {
465+
fmt.Printf("CANCELED: ErrorCode=%d\nCANCELED: ErrorDetails=[%s]\nCANCELED: Did you set the speech resource key and region values?\n",
466+
cancellation.ErrorCode,
467+
cancellation.ErrorDetails)
468+
}
469+
}
470+
}
471+
```
472+
473+
You can find more text-to-speech samples at [GitHub](https://github.com/microsoft/cognitive-services-speech-sdk-go/tree/master/samples/).

0 commit comments

Comments
 (0)