Merge pull request #256579 from niuzheng168/master

PMEds28 · web-flow · commit 059970d5f642 · 2023-10-30T10:35:31.000Z
Update openai-speech sample code
diff --git a/articles/ai-services/speech-service/includes/quickstarts/openai-speech/csharp.md b/articles/ai-services/speech-service/includes/quickstarts/openai-speech/csharp.md
@@ -41,7 +41,7 @@ Follow these steps to create a new console application.
     ```
 1. Replace the contents of `Program.cs` with the following code. 
 
-    ```csharp
+ ```csharp
     using System;
     using System.IO;
     using System.Threading.Tasks;
@@ -64,42 +64,67 @@ Follow these steps to create a new console application.
         // This example requires environment variables named "SPEECH_KEY" and "SPEECH_REGION"
         static string speechKey = Environment.GetEnvironmentVariable("SPEECH_KEY");
         static string speechRegion = Environment.GetEnvironmentVariable("SPEECH_REGION");
-    
+
+        // Sentence end symbols for splitting the response into sentences.
+        static List<string> sentenceSaperators = new() { ".", "!", "?", ";", "。", "！", "？", "；", "\n" };
+
+        private static object consoleLock = new();
+
         // Prompts Azure OpenAI with a request and synthesizes the response.
         async static Task AskOpenAI(string prompt)
         {
+            var speechConfig = SpeechConfig.FromSubscription(speechKey, speechRegion);
+            // The language of the voice that speaks.
+            speechConfig.SpeechSynthesisVoiceName = "en-US-JennyMultilingualNeural";
+            var audioOutputConfig = AudioConfig.FromDefaultSpeakerOutput();
+            using var speechSynthesizer = new SpeechSynthesizer(speechConfig, audioOutputConfig);
+            speechSynthesizer.Synthesizing += (sender, args) =>
+            {
+                lock (consoleLock)
+                {
+                    Console.ForegroundColor = ConsoleColor.Yellow;
+                    Console.Write($"[Audio]");
+                    Console.ResetColor();
+                }
+            };
+            
             // Ask Azure OpenAI
             OpenAIClient client = new(new Uri(openAIEndpoint), new AzureKeyCredential(openAIKey));
             var completionsOptions = new CompletionsOptions()
             {
                 Prompts = { prompt },
                 MaxTokens = 100,
+                
             };
-            Response<Completions> completionsResponse = client.GetCompletions(engine, completionsOptions);
-            string text = completionsResponse.Value.Choices[0].Text.Trim();
-            Console.WriteLine($"Azure OpenAI response: {text}");
-     
-            var speechConfig = SpeechConfig.FromSubscription(speechKey, speechRegion);
-            // The language of the voice that speaks.
-            speechConfig.SpeechSynthesisVoiceName = "en-US-JennyMultilingualNeural"; 
-            var audioOutputConfig = AudioConfig.FromDefaultSpeakerOutput();
-    
-            using (var speechSynthesizer = new SpeechSynthesizer(speechConfig, audioOutputConfig))
+            var responseStream = await client.GetCompletionsStreamingAsync(engine, completionsOptions);
+            using var streamingCompletions = responseStream.Value;
+            StringBuilder gptBuffer = new();
+            await foreach (var choice in streamingCompletions.GetChoicesStreaming())
             {
-                var speechSynthesisResult = await speechSynthesizer.SpeakTextAsync(text).ConfigureAwait(true);
-    
-                if (speechSynthesisResult.Reason == ResultReason.SynthesizingAudioCompleted)
+                await foreach (var message in choice.GetTextStreaming())
                 {
-                    Console.WriteLine($"Speech synthesized to speaker for text: [{text}]");
-                }
-                else if (speechSynthesisResult.Reason == ResultReason.Canceled)
-                {
-                    var cancellationDetails = SpeechSynthesisCancellationDetails.FromResult(speechSynthesisResult);
-                    Console.WriteLine($"Speech synthesis canceled: {cancellationDetails.Reason}");
-    
-                    if (cancellationDetails.Reason == CancellationReason.Error)
+                    if (string.IsNullOrEmpty(message))
+                    {
+                        continue;
+                    }
+        
+                    lock (consoleLock)
                     {
-                        Console.WriteLine($"Error details: {cancellationDetails.ErrorDetails}");
+                        Console.ForegroundColor = ConsoleColor.DarkBlue;
+                        Console.Write($"{message}");
+                        Console.ResetColor();
+                    }
+        
+                    gptBuffer.Append(message);
+                    
+                    if (sentenceSaperators.Any(message.Contains))
+                    {
+                        var sentence = gptBuffer.ToString().Trim();
+                        if (!string.IsNullOrEmpty(sentence))
+                        {
+                            await speechSynthesizer.SpeakTextAsync(sentence).ConfigureAwait(true);
+                            gptBuffer.Clear();
+                        }
                     }
                 }
             }
@@ -164,7 +189,7 @@ Follow these steps to create a new console application.
             }
         }
     }
-    ```
+```
 
 1. To increase or decrease the number of tokens returned by Azure OpenAI, change the `MaxTokens` property in the `CompletionsOptions` class instance. For more information tokens and cost implications, see [Azure OpenAI tokens](/azure/ai-services/openai/overview#tokens) and [Azure OpenAI pricing](https://azure.microsoft.com/pricing/details/cognitive-services/openai-service/).
 
diff --git a/articles/ai-services/speech-service/includes/quickstarts/openai-speech/python.md b/articles/ai-services/speech-service/includes/quickstarts/openai-speech/python.md
@@ -48,7 +48,7 @@ Follow these steps to create a new console application.
 
 1. Copy the following code into `openai-speech.py`: 
 
-    ```Python
+```Python
     import os
     import azure.cognitiveservices.speech as speechsdk
     import openai
@@ -75,26 +75,30 @@ Follow these steps to create a new console application.
     # The language of the voice that responds on behalf of Azure OpenAI.
     speech_config.speech_synthesis_voice_name='en-US-JennyMultilingualNeural'
     speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=audio_output_config)
+
+    # tts sentence end mark
+    tts_sentence_end = [ ".", "!", "?", ";", "。", "！", "？", "；", "\n" ]
     
     # Prompts Azure OpenAI with a request and synthesizes the response.
     def ask_openai(prompt):
+        # Ask Azure OpenAI in streaming way
+        response = openai.Completion.create(engine=deployment_id, prompt=prompt, max_tokens=200, stream=True)
+        collected_messages = []
+        last_tts_request = None
     
-        # Ask Azure OpenAI
-        response = openai.Completion.create(engine=deployment_id, prompt=prompt, max_tokens=100)
-        text = response['choices'][0]['text'].replace('\n', ' ').replace(' .', '.').strip()
-        print('Azure OpenAI response:' + text)
-        
-        # Azure text to speech output
-        speech_synthesis_result = speech_synthesizer.speak_text_async(text).get()
-    
-        # Check result
-        if speech_synthesis_result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
-            print("Speech synthesized to speaker for text [{}]".format(text))
-        elif speech_synthesis_result.reason == speechsdk.ResultReason.Canceled:
-            cancellation_details = speech_synthesis_result.cancellation_details
-            print("Speech synthesis canceled: {}".format(cancellation_details.reason))
-            if cancellation_details.reason == speechsdk.CancellationReason.Error:
-                print("Error details: {}".format(cancellation_details.error_details))
+        # iterate through the stream response stream
+        for chunk in response:
+            if len(chunk['choices']) > 0:
+                chunk_message = chunk['choices'][0]['text']  # extract the message
+                collected_messages.append(chunk_message)  # save the message
+                if chunk_message in tts_sentence_end: # sentence end found
+                    text = ''.join(collected_messages).strip() # join the recieved message together to build a sentence
+                    if text != '': # if sentence only have \n or space, we could skip
+                        print(f"Speech synthesized to speaker for: {text}")
+                        last_tts_request = speech_synthesizer.speak_text_async(text)
+                        collected_messages.clear()
+        if last_tts_request:
+            last_tts_request.get()
     
     # Continuously listens for speech input to recognize and send as text to Azure OpenAI
     def chat_with_open_ai():
@@ -128,7 +132,8 @@ Follow these steps to create a new console application.
         chat_with_open_ai()
     except Exception as err:
         print("Encountered exception. {}".format(err))
-    ```
+```
+
 1. To increase or decrease the number of tokens returned by Azure OpenAI, change the `max_tokens` parameter. For more information tokens and cost implications, see [Azure OpenAI tokens](/azure/ai-services/openai/overview#tokens) and [Azure OpenAI pricing](https://azure.microsoft.com/pricing/details/cognitive-services/openai-service/).
 
 Run your new console application to start speech recognition from a microphone: