java synthesizer events etc

eric-urban · eric-urban · commit 62f8cc6cc69a · 2022-09-30T08:07:07.000-07:00
diff --git a/articles/cognitive-services/Speech-Service/includes/how-to/speech-synthesis/java.md b/articles/cognitive-services/Speech-Service/includes/how-to/speech-synthesis/java.md
@@ -198,17 +198,113 @@ public static void main(String[] args) {
 
 ## Subscribe to synthesizer events
 
+You might want more insights about the text-to-speech processing and results. For example, you might want to know when the synthesizer starts and stops, or you might want to know about other events encountered during synthesis. 
+
 While using the [SpeechSynthesizer](/java/api/com.microsoft.cognitiveservices.speech.speechsynthesizer) for text-to-speech, you can subscribe to the events in this table:
 
 [!INCLUDE [Event types](events.md)]
 
-Here's an example that shows how to subscribe to the `BookmarkReached` event for speech synthesis. 
+Here's an example that shows how to subscribe to events for speech synthesis. You can follow the instructions in the [quickstart](../../../get-started-text-to-speech.md?pivots=java), but replace the contents of that `SpeechSynthesis.java` file with the following Java code.
 
 ```java
-speechSynthesizer.BookmarkReached.addEventListener((o, e) -> {
-    System.out.print("Bookmark reached. Audio offset: " + e.getAudioOffset() / 10000 + "ms, ");
-    System.out.println("bookmark text: " + e.getText() + ".");
-});
+import com.microsoft.cognitiveservices.speech.*;
+import com.microsoft.cognitiveservices.speech.audio.*;
+
+import java.util.Scanner;
+import java.util.concurrent.ExecutionException;
+
+public class SpeechSynthesis {
+    private static String speechKey = System.getenv("SPEECH_KEY");
+    private static String speechRegion = System.getenv("SPEECH_REGION");
+
+    public static void main(String[] args) throws InterruptedException, ExecutionException {
+
+        SpeechConfig speechConfig = SpeechConfig.fromSubscription(speechKey, speechRegion);
+
+        String speechSynthesisVoiceName = "en-US-JennyNeural"; 
+        
+        String ssml = String.format("<speak version='1.0' xml:lang='en-US' xmlns='http://www.w3.org/2001/10/synthesis' xmlns:mstts='http://www.w3.org/2001/mstts'>"
+            .concat(String.format("<voice name='%s'>", speechSynthesisVoiceName))
+            .concat("<mstts:viseme type='redlips_front'/>")
+            .concat("The rainbow has seven colors: <bookmark mark='colors_list_begin'/>Red, orange, yellow, green, blue, indigo, and violet.<bookmark mark='colors_list_end'/>.")
+            .concat("</voice>")
+            .concat("</speak>"));
+
+        SpeechSynthesizer speechSynthesizer = new SpeechSynthesizer(speechConfig);
+        {
+            // Subscribe to events
+
+            speechSynthesizer.BookmarkReached.addEventListener((o, e) -> {
+                System.out.println("BookmarkReached event:");
+                System.out.println("\tAudioOffset: " + ((e.getAudioOffset() + 5000) / 10000) + "ms");
+                System.out.println("\tText: " + e.getText());
+            });
+
+            speechSynthesizer.SynthesisCanceled.addEventListener((o, e) -> {
+                System.out.println("SynthesisCanceled event");
+            });
+
+            speechSynthesizer.SynthesisCompleted.addEventListener((o, e) -> {
+                SpeechSynthesisResult result = e.getResult();                
+                byte[] audioData = result.getAudioData();
+                System.out.println("SynthesisCompleted event:");
+                System.out.println("\tAudioData: " + audioData.length + " bytes");
+                System.out.println("\tAudioDuration: " + result.getAudioDuration());
+                result.close();
+            });
+            
+            speechSynthesizer.SynthesisStarted.addEventListener((o, e) -> {
+                System.out.println("SynthesisStarted event");
+            });
+
+            speechSynthesizer.Synthesizing.addEventListener((o, e) -> {
+                SpeechSynthesisResult result = e.getResult();
+                byte[] audioData = result.getAudioData();
+                System.out.println("Synthesizing event:");
+                System.out.println("\tAudioData: " + audioData.length + " bytes");
+                result.close();
+            });
+
+            speechSynthesizer.VisemeReceived.addEventListener((o, e) -> {
+                System.out.println("VisemeReceived event:");
+                System.out.println("\tAudioOffset: " + ((e.getAudioOffset() + 5000) / 10000) + "ms");
+                System.out.println("\tVisemeId: " + e.getVisemeId());
+            });
+
+            speechSynthesizer.WordBoundary.addEventListener((o, e) -> {
+                System.out.println("WordBoundary event:");
+                System.out.println("\tBoundaryType: " + e.getBoundaryType());
+                System.out.println("\tAudioOffset: " + ((e.getAudioOffset() + 5000) / 10000) + "ms");
+                System.out.println("\tDuration: " + e.getDuration());
+                System.out.println("\tText: " + e.getText());
+                System.out.println("\tTextOffset: " + e.getTextOffset());
+                System.out.println("\tWordLength: " + e.getWordLength());
+            });
+
+            // Synthesize the SSML
+            System.out.println("SSML to synthesize:");
+            System.out.println(ssml);
+            SpeechSynthesisResult speechRecognitionResult = speechSynthesizer.SpeakSsmlAsync(ssml).get();
+
+            if (speechRecognitionResult.getReason() == ResultReason.SynthesizingAudioCompleted) {
+                System.out.println("SynthesizingAudioCompleted result");
+            }
+            else if (speechRecognitionResult.getReason() == ResultReason.Canceled) {
+                SpeechSynthesisCancellationDetails cancellation = SpeechSynthesisCancellationDetails.fromResult(speechRecognitionResult);
+                System.out.println("CANCELED: Reason=" + cancellation.getReason());
+
+                if (cancellation.getReason() == CancellationReason.Error) {
+                    System.out.println("CANCELED: ErrorCode=" + cancellation.getErrorCode());
+                    System.out.println("CANCELED: ErrorDetails=" + cancellation.getErrorDetails());
+                    System.out.println("CANCELED: Did you set the speech resource key and region values?");
+                }
+            }
+        }
+        speechSynthesizer.close();
+
+        System.exit(0);
+    }
+}
 ```
 
 You can find more text-to-speech samples at [GitHub](https://aka.ms/csspeech/samples).
diff --git a/articles/cognitive-services/Speech-Service/includes/how-to/speech-synthesis/python.md b/articles/cognitive-services/Speech-Service/includes/how-to/speech-synthesis/python.md
@@ -164,40 +164,44 @@ import os
 import azure.cognitiveservices.speech as speechsdk
 
 def speech_synthesizer_bookmark_reached_cb(evt: speechsdk.SessionEventArgs):
-    print('BookmarkReached event')
+    print('BookmarkReached event:')
     print('\tAudioOffset: {}ms'.format((evt.audio_offset + 5000) / 10000))
     print('\tText: {}'.format(evt.text))
 
 def speech_synthesizer_synthesis_canceled_cb(evt: speechsdk.SessionEventArgs):
     print('SynthesisCanceled event')
 
 def speech_synthesizer_synthesis_completed_cb(evt: speechsdk.SessionEventArgs):
-    print('SynthesisCompleted event')
+    print('SynthesisCompleted event:')
     print('\tAudioData: {} bytes'.format(len(evt.result.audio_data)))
     print('\tAudioDuration: {}'.format(evt.result.audio_duration))
 
 def speech_synthesizer_synthesis_started_cb(evt: speechsdk.SessionEventArgs):
     print('SynthesisStarted event')
 
+def speech_synthesizer_synthesizing_cb(evt: speechsdk.SessionEventArgs):
+    print('Synthesizing event:')
+    print('\tAudioData: {} bytes'.format(len(evt.result.audio_data)))
+
+def speech_synthesizer_viseme_received_cb(evt: speechsdk.SessionEventArgs):
+    print('VisemeReceived event:')
+    print('\tAudioOffset: {}ms'.format((evt.audio_offset + 5000) / 10000))
+    print('\tVisemeId: {}'.format(evt.viseme_id))
+
 def speech_synthesizer_word_boundary_cb(evt: speechsdk.SessionEventArgs):
-    print('WordBoundary event')
+    print('WordBoundary event:')
     print('\tBoundaryType: {}'.format(evt.boundary_type))
     print('\tAudioOffset: {}ms'.format((evt.audio_offset + 5000) / 10000))
     print('\tDuration: {}'.format(evt.duration))
     print('\tText: {}'.format(evt.text))
     print('\tTextOffset: {}'.format(evt.text_offset))
     print('\tWordLength: {}'.format(evt.word_length))
 
-def speech_synthesizer_synthesizing_cb(evt: speechsdk.SessionEventArgs):
-    print('Synthesizing event')
-    print('\tAudioData: {} bytes'.format(len(evt.result.audio_data)))
+speech_config = speechsdk.SpeechConfig(subscription=os.environ.get('SPEECH_KEY'), region=os.environ.get('SPEECH_REGION'))
 
-def speech_synthesizer_viseme_received_cb(evt: speechsdk.SessionEventArgs):
-    print('VisemeReceived event')
-    print('\tAudioOffset: {}ms'.format((evt.audio_offset + 5000) / 10000))
-    print('\tVisemeId: {}'.format(evt.viseme_id))
+# Required for WordBoundary event sentences.
+speech_config.set_property(property_id=speechsdk.PropertyId.SpeechServiceResponse_RequestSentenceBoundary, value='true')
 
-speech_config = speechsdk.SpeechConfig(subscription=os.environ.get('SPEECH_KEY'), region=os.environ.get('SPEECH_REGION'))
 audio_config = speechsdk.audio.AudioOutputConfig(use_default_speaker=True)
 speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config)
 
@@ -206,12 +210,9 @@ speech_synthesizer.bookmark_reached.connect(speech_synthesizer_bookmark_reached_
 speech_synthesizer.synthesis_canceled.connect(speech_synthesizer_synthesis_canceled_cb)
 speech_synthesizer.synthesis_completed.connect(speech_synthesizer_synthesis_completed_cb)
 speech_synthesizer.synthesis_started.connect(speech_synthesizer_synthesis_started_cb)
-speech_synthesizer.synthesis_word_boundary.connect(speech_synthesizer_word_boundary_cb)
 speech_synthesizer.synthesizing.connect(speech_synthesizer_synthesizing_cb)
 speech_synthesizer.viseme_received.connect(speech_synthesizer_viseme_received_cb)
-
-# Required for WordBoundary event sentences.
-speech_config.set_property(property_id=speechsdk.PropertyId.SpeechServiceResponse_RequestSentenceBoundary, value='true')
+speech_synthesizer.synthesis_word_boundary.connect(speech_synthesizer_word_boundary_cb)
 
 # The language of the voice that speaks.
 speech_synthesis_voice_name='en-US-JennyNeural'