Skip to content

Commit 059970d

Browse files
authored
Merge pull request #256579 from niuzheng168/master
Update openai-speech sample code
2 parents 3ed6e99 + c7ad006 commit 059970d

File tree

2 files changed

+73
-43
lines changed

2 files changed

+73
-43
lines changed

articles/ai-services/speech-service/includes/quickstarts/openai-speech/csharp.md

Lines changed: 50 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ Follow these steps to create a new console application.
4141
```
4242
1. Replace the contents of `Program.cs` with the following code.
4343
44-
```csharp
44+
```csharp
4545
using System;
4646
using System.IO;
4747
using System.Threading.Tasks;
@@ -64,42 +64,67 @@ Follow these steps to create a new console application.
6464
// This example requires environment variables named "SPEECH_KEY" and "SPEECH_REGION"
6565
static string speechKey = Environment.GetEnvironmentVariable("SPEECH_KEY");
6666
static string speechRegion = Environment.GetEnvironmentVariable("SPEECH_REGION");
67-
67+
68+
// Sentence end symbols for splitting the response into sentences.
69+
static List<string> sentenceSaperators = new() { ".", "!", "?", ";", "。", "!", "?", ";", "\n" };
70+
71+
private static object consoleLock = new();
72+
6873
// Prompts Azure OpenAI with a request and synthesizes the response.
6974
async static Task AskOpenAI(string prompt)
7075
{
76+
var speechConfig = SpeechConfig.FromSubscription(speechKey, speechRegion);
77+
// The language of the voice that speaks.
78+
speechConfig.SpeechSynthesisVoiceName = "en-US-JennyMultilingualNeural";
79+
var audioOutputConfig = AudioConfig.FromDefaultSpeakerOutput();
80+
using var speechSynthesizer = new SpeechSynthesizer(speechConfig, audioOutputConfig);
81+
speechSynthesizer.Synthesizing += (sender, args) =>
82+
{
83+
lock (consoleLock)
84+
{
85+
Console.ForegroundColor = ConsoleColor.Yellow;
86+
Console.Write($"[Audio]");
87+
Console.ResetColor();
88+
}
89+
};
90+
7191
// Ask Azure OpenAI
7292
OpenAIClient client = new(new Uri(openAIEndpoint), new AzureKeyCredential(openAIKey));
7393
var completionsOptions = new CompletionsOptions()
7494
{
7595
Prompts = { prompt },
7696
MaxTokens = 100,
97+
7798
};
78-
Response<Completions> completionsResponse = client.GetCompletions(engine, completionsOptions);
79-
string text = completionsResponse.Value.Choices[0].Text.Trim();
80-
Console.WriteLine($"Azure OpenAI response: {text}");
81-
82-
var speechConfig = SpeechConfig.FromSubscription(speechKey, speechRegion);
83-
// The language of the voice that speaks.
84-
speechConfig.SpeechSynthesisVoiceName = "en-US-JennyMultilingualNeural";
85-
var audioOutputConfig = AudioConfig.FromDefaultSpeakerOutput();
86-
87-
using (var speechSynthesizer = new SpeechSynthesizer(speechConfig, audioOutputConfig))
99+
var responseStream = await client.GetCompletionsStreamingAsync(engine, completionsOptions);
100+
using var streamingCompletions = responseStream.Value;
101+
StringBuilder gptBuffer = new();
102+
await foreach (var choice in streamingCompletions.GetChoicesStreaming())
88103
{
89-
var speechSynthesisResult = await speechSynthesizer.SpeakTextAsync(text).ConfigureAwait(true);
90-
91-
if (speechSynthesisResult.Reason == ResultReason.SynthesizingAudioCompleted)
104+
await foreach (var message in choice.GetTextStreaming())
92105
{
93-
Console.WriteLine($"Speech synthesized to speaker for text: [{text}]");
94-
}
95-
else if (speechSynthesisResult.Reason == ResultReason.Canceled)
96-
{
97-
var cancellationDetails = SpeechSynthesisCancellationDetails.FromResult(speechSynthesisResult);
98-
Console.WriteLine($"Speech synthesis canceled: {cancellationDetails.Reason}");
99-
100-
if (cancellationDetails.Reason == CancellationReason.Error)
106+
if (string.IsNullOrEmpty(message))
107+
{
108+
continue;
109+
}
110+
111+
lock (consoleLock)
101112
{
102-
Console.WriteLine($"Error details: {cancellationDetails.ErrorDetails}");
113+
Console.ForegroundColor = ConsoleColor.DarkBlue;
114+
Console.Write($"{message}");
115+
Console.ResetColor();
116+
}
117+
118+
gptBuffer.Append(message);
119+
120+
if (sentenceSaperators.Any(message.Contains))
121+
{
122+
var sentence = gptBuffer.ToString().Trim();
123+
if (!string.IsNullOrEmpty(sentence))
124+
{
125+
await speechSynthesizer.SpeakTextAsync(sentence).ConfigureAwait(true);
126+
gptBuffer.Clear();
127+
}
103128
}
104129
}
105130
}
@@ -164,7 +189,7 @@ Follow these steps to create a new console application.
164189
}
165190
}
166191
}
167-
```
192+
```
168193

169194
1. To increase or decrease the number of tokens returned by Azure OpenAI, change the `MaxTokens` property in the `CompletionsOptions` class instance. For more information tokens and cost implications, see [Azure OpenAI tokens](/azure/ai-services/openai/overview#tokens) and [Azure OpenAI pricing](https://azure.microsoft.com/pricing/details/cognitive-services/openai-service/).
170195

articles/ai-services/speech-service/includes/quickstarts/openai-speech/python.md

Lines changed: 23 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ Follow these steps to create a new console application.
4848

4949
1. Copy the following code into `openai-speech.py`:
5050

51-
```Python
51+
```Python
5252
import os
5353
import azure.cognitiveservices.speech as speechsdk
5454
import openai
@@ -75,26 +75,30 @@ Follow these steps to create a new console application.
7575
# The language of the voice that responds on behalf of Azure OpenAI.
7676
speech_config.speech_synthesis_voice_name='en-US-JennyMultilingualNeural'
7777
speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=audio_output_config)
78+
79+
# tts sentence end mark
80+
tts_sentence_end = [ ".", "!", "?", ";", "。", "!", "?", ";", "\n" ]
7881
7982
# Prompts Azure OpenAI with a request and synthesizes the response.
8083
def ask_openai(prompt):
84+
# Ask Azure OpenAI in streaming way
85+
response = openai.Completion.create(engine=deployment_id, prompt=prompt, max_tokens=200, stream=True)
86+
collected_messages = []
87+
last_tts_request = None
8188
82-
# Ask Azure OpenAI
83-
response = openai.Completion.create(engine=deployment_id, prompt=prompt, max_tokens=100)
84-
text = response['choices'][0]['text'].replace('\n', ' ').replace(' .', '.').strip()
85-
print('Azure OpenAI response:' + text)
86-
87-
# Azure text to speech output
88-
speech_synthesis_result = speech_synthesizer.speak_text_async(text).get()
89-
90-
# Check result
91-
if speech_synthesis_result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
92-
print("Speech synthesized to speaker for text [{}]".format(text))
93-
elif speech_synthesis_result.reason == speechsdk.ResultReason.Canceled:
94-
cancellation_details = speech_synthesis_result.cancellation_details
95-
print("Speech synthesis canceled: {}".format(cancellation_details.reason))
96-
if cancellation_details.reason == speechsdk.CancellationReason.Error:
97-
print("Error details: {}".format(cancellation_details.error_details))
89+
# iterate through the stream response stream
90+
for chunk in response:
91+
if len(chunk['choices']) > 0:
92+
chunk_message = chunk['choices'][0]['text'] # extract the message
93+
collected_messages.append(chunk_message) # save the message
94+
if chunk_message in tts_sentence_end: # sentence end found
95+
text = ''.join(collected_messages).strip() # join the recieved message together to build a sentence
96+
if text != '': # if sentence only have \n or space, we could skip
97+
print(f"Speech synthesized to speaker for: {text}")
98+
last_tts_request = speech_synthesizer.speak_text_async(text)
99+
collected_messages.clear()
100+
if last_tts_request:
101+
last_tts_request.get()
98102
99103
# Continuously listens for speech input to recognize and send as text to Azure OpenAI
100104
def chat_with_open_ai():
@@ -128,7 +132,8 @@ Follow these steps to create a new console application.
128132
chat_with_open_ai()
129133
except Exception as err:
130134
print("Encountered exception. {}".format(err))
131-
```
135+
```
136+
132137
1. To increase or decrease the number of tokens returned by Azure OpenAI, change the `max_tokens` parameter. For more information tokens and cost implications, see [Azure OpenAI tokens](/azure/ai-services/openai/overview#tokens) and [Azure OpenAI pricing](https://azure.microsoft.com/pricing/details/cognitive-services/openai-service/).
133138

134139
Run your new console application to start speech recognition from a microphone:

0 commit comments

Comments
 (0)