Skip to content

Commit 7d33435

Browse files
Merge pull request #234598 from eric-urban/eur/speech-openai
add csharp sample for speech and openai
2 parents 04db176 + 873ff28 commit 7d33435

File tree

5 files changed

+230
-8
lines changed

5 files changed

+230
-8
lines changed
Lines changed: 208 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,208 @@
1+
---
2+
author: eric-urban
3+
ms.service: cognitive-services
4+
ms.topic: include
5+
ms.date: 04/15/2022
6+
ms.author: eur
7+
---
8+
9+
[!INCLUDE [Header](../../common/csharp.md)]
10+
11+
[!INCLUDE [Introduction](intro.md)]
12+
13+
## Prerequisites
14+
15+
[!INCLUDE [Prerequisites](../../common/azure-prerequisites.md)]
16+
17+
## Set up the environment
18+
The Speech SDK is available as a [NuGet package](https://www.nuget.org/packages/Microsoft.CognitiveServices.Speech) and implements .NET Standard 2.0. You install the Speech SDK later in this guide, but first check the [SDK installation guide](../../../quickstarts/setup-platform.md?pivots=programming-language-csharp) for any more requirements.
19+
20+
### Set environment variables
21+
22+
This example requires environment variables named `OPEN_AI_KEY`, `OPEN_AI_ENDPOINT`, `SPEECH_KEY`, and `SPEECH_REGION`.
23+
24+
[!INCLUDE [Environment variables](../../common/environment-variables-openai.md)]
25+
26+
## Recognize speech from a microphone
27+
28+
Follow these steps to create a new console application.
29+
30+
1. Open a command prompt where you want the new project, and create a console application with the .NET CLI. The `Program.cs` file should be created in the project directory.
31+
```dotnetcli
32+
dotnet new console
33+
```
34+
1. Install the Speech SDK in your new project with the .NET CLI.
35+
```dotnetcli
36+
dotnet add package Microsoft.CognitiveServices.Speech
37+
```
38+
1. Install the Azure OpenAI SDK (prerelease) in your new project with the .NET CLI.
39+
```dotnetcli
40+
dotnet add package Azure.AI.OpenAI --prerelease
41+
```
42+
1. Replace the contents of `Program.cs` with the following code.
43+
44+
```csharp
45+
using System;
46+
using System.IO;
47+
using System.Threading.Tasks;
48+
using Microsoft.CognitiveServices.Speech;
49+
using Microsoft.CognitiveServices.Speech.Audio;
50+
using Azure;
51+
using Azure.AI.OpenAI;
52+
using static System.Environment;
53+
54+
class Program
55+
{
56+
// This example requires environment variables named "OPEN_AI_KEY" and "OPEN_AI_ENDPOINT"
57+
// Your endpoint should look like the following https://YOUR_OPEN_AI_RESOURCE_NAME.openai.azure.com/
58+
static string openAIKey = Environment.GetEnvironmentVariable("OPEN_AI_KEY");
59+
static string openAIEndpoint = Environment.GetEnvironmentVariable("OPEN_AI_ENDPOINT");
60+
61+
// Enter the deployment name you chose when you deployed the model.
62+
static string engine = "text-davinci-002";
63+
64+
// This example requires environment variables named "SPEECH_KEY" and "SPEECH_REGION"
65+
static string speechKey = Environment.GetEnvironmentVariable("SPEECH_KEY");
66+
static string speechRegion = Environment.GetEnvironmentVariable("SPEECH_REGION");
67+
68+
// Prompts Azure OpenAI with a request and synthesizes the response.
69+
async static Task AskOpenAI(string prompt)
70+
{
71+
// Ask Azure OpenAI
72+
OpenAIClient client = new(new Uri(openAIEndpoint), new AzureKeyCredential(openAIKey));
73+
var completionsOptions = new CompletionsOptions()
74+
{
75+
Prompts = { prompt },
76+
MaxTokens = 100,
77+
};
78+
Response<Completions> completionsResponse = client.GetCompletions(engine, completionsOptions);
79+
string text = completionsResponse.Value.Choices[0].Text.Trim();
80+
Console.WriteLine($"Azure OpenAI response: {text}");
81+
82+
var speechConfig = SpeechConfig.FromSubscription(speechKey, speechRegion);
83+
// The language of the voice that speaks.
84+
speechConfig.SpeechSynthesisVoiceName = "en-US-JennyMultilingualNeural";
85+
var audioOutputConfig = AudioConfig.FromDefaultSpeakerOutput();
86+
87+
using (var speechSynthesizer = new SpeechSynthesizer(speechConfig, audioOutputConfig))
88+
{
89+
var speechSynthesisResult = await speechSynthesizer.SpeakTextAsync(text).ConfigureAwait(true);
90+
91+
if (speechSynthesisResult.Reason == ResultReason.SynthesizingAudioCompleted)
92+
{
93+
Console.WriteLine($"Speech synthesized to speaker for text: [{text}]");
94+
}
95+
else if (speechSynthesisResult.Reason == ResultReason.Canceled)
96+
{
97+
var cancellationDetails = SpeechSynthesisCancellationDetails.FromResult(speechSynthesisResult);
98+
Console.WriteLine($"Speech synthesis canceled: {cancellationDetails.Reason}");
99+
100+
if (cancellationDetails.Reason == CancellationReason.Error)
101+
{
102+
Console.WriteLine($"Error details: {cancellationDetails.ErrorDetails}");
103+
}
104+
}
105+
}
106+
}
107+
108+
// Continuously listens for speech input to recognize and send as text to Azure OpenAI
109+
async static Task ChatWithOpenAI()
110+
{
111+
// Should be the locale for the speaker's language.
112+
var speechConfig = SpeechConfig.FromSubscription(speechKey, speechRegion);
113+
speechConfig.SpeechRecognitionLanguage = "en-US";
114+
115+
using var audioConfig = AudioConfig.FromDefaultMicrophoneInput();
116+
using var speechRecognizer = new SpeechRecognizer(speechConfig, audioConfig);
117+
var conversationEnded = false;
118+
119+
while(!conversationEnded)
120+
{
121+
Console.WriteLine("Azure OpenAI is listening. Say 'Stop' or press Ctrl-Z to end the conversation.");
122+
123+
// Get audio from the microphone and then send it to the TTS service.
124+
var speechRecognitionResult = await speechRecognizer.RecognizeOnceAsync();
125+
126+
switch (speechRecognitionResult.Reason)
127+
{
128+
case ResultReason.RecognizedSpeech:
129+
if (speechRecognitionResult.Text == "Stop.")
130+
{
131+
Console.WriteLine("Conversation ended.");
132+
conversationEnded = true;
133+
}
134+
else
135+
{
136+
Console.WriteLine($"Recognized speech: {speechRecognitionResult.Text}");
137+
await AskOpenAI(speechRecognitionResult.Text).ConfigureAwait(true);
138+
}
139+
break;
140+
case ResultReason.NoMatch:
141+
Console.WriteLine($"No speech could be recognized: ");
142+
break;
143+
case ResultReason.Canceled:
144+
var cancellationDetails = CancellationDetails.FromResult(speechRecognitionResult);
145+
Console.WriteLine($"Speech Recognition canceled: {cancellationDetails.Reason}");
146+
if (cancellationDetails.Reason == CancellationReason.Error)
147+
{
148+
Console.WriteLine($"Error details={cancellationDetails.ErrorDetails}");
149+
}
150+
break;
151+
}
152+
}
153+
}
154+
155+
async static Task Main(string[] args)
156+
{
157+
try
158+
{
159+
await ChatWithOpenAI().ConfigureAwait(true);
160+
}
161+
catch (Exception ex)
162+
{
163+
Console.WriteLine(ex.Message);
164+
}
165+
}
166+
}
167+
```
168+
169+
1. To increase or decrease the number of tokens returned by Azure OpenAI, change the `MaxTokens` property in the `CompletionsOptions` class instance. For more information tokens and cost implications, see [Azure OpenAI tokens](/azure/cognitive-services/openai/overview#tokens) and [Azure OpenAI pricing](https://azure.microsoft.com/pricing/details/cognitive-services/openai-service/).
170+
171+
Run your new console application to start speech recognition from a microphone:
172+
173+
```console
174+
dotnet run
175+
```
176+
177+
> [!IMPORTANT]
178+
> Make sure that you set the `OPEN_AI_KEY`, `OPEN_AI_ENDPOINT`, `SPEECH__KEY` and `SPEECH__REGION` environment variables as described [previously](#set-environment-variables). If you don't set these variables, the sample will fail with an error message.
179+
180+
Speak into your microphone when prompted. The console output includes the prompt for you to begin speaking, then your request as text, and then the response from Azure OpenAI as text. The response from Azure OpenAI should be converted from text to speech and then output to the default speaker.
181+
182+
```console
183+
PS C:\dev\openai\csharp> dotnet run
184+
Azure OpenAI is listening. Say 'Stop' or press Ctrl-Z to end the conversation.
185+
Recognized speech:Make a comma separated list of all continents.
186+
Azure OpenAI response:Africa, Antarctica, Asia, Australia, Europe, North America, South America
187+
Speech synthesized to speaker for text [Africa, Antarctica, Asia, Australia, Europe, North America, South America]
188+
Azure OpenAI is listening. Say 'Stop' or press Ctrl-Z to end the conversation.
189+
Recognized speech: Make a comma separated list of 1 Astronomical observatory for each continent. A list should include each continent name in parentheses.
190+
Azure OpenAI response:Mauna Kea Observatories (North America), La Silla Observatory (South America), Tenerife Observatory (Europe), Siding Spring Observatory (Australia), Beijing Xinglong Observatory (Asia), Naukluft Plateau Observatory (Africa), Rutherford Appleton Laboratory (Antarctica)
191+
Speech synthesized to speaker for text [Mauna Kea Observatories (North America), La Silla Observatory (South America), Tenerife Observatory (Europe), Siding Spring Observatory (Australia), Beijing Xinglong Observatory (Asia), Naukluft Plateau Observatory (Africa), Rutherford Appleton Laboratory (Antarctica)]
192+
Azure OpenAI is listening. Say 'Stop' or press Ctrl-Z to end the conversation.
193+
Conversation ended.
194+
PS C:\dev\openai\csharp>
195+
```
196+
197+
## Remarks
198+
Now that you've completed the quickstart, here are some more considerations:
199+
200+
- To change the speech recognition language, replace `en-US` with another [supported language](~/articles/cognitive-services/speech-service/supported-languages.md). For example, `es-ES` for Spanish (Spain). The default language is `en-US` if you don't specify a language. For details about how to identify one of multiple languages that might be spoken, see [language identification](~/articles/cognitive-services/speech-service/language-identification.md).
201+
- To change the voice that you hear, replace `en-US-JennyMultilingualNeural` with another [supported voice](~/articles/cognitive-services/speech-service/supported-languages.md#prebuilt-neural-voices). If the voice doesn't speak the language of the text returned from Azure OpenAI, the Speech service doesn't output synthesized audio.
202+
- To use a different [model](/azure/cognitive-services/openai/concepts/models#model-summary-table-and-region-availability), replace `text-davinci-002` with the ID of another [deployment](/azure/cognitive-services/openai/how-to/create-resource#deploy-a-model). Keep in mind that the deployment ID isn't necessarily the same as the model name. You named your deployment when you created it in [Azure OpenAI Studio](https://oai.azure.com/).
203+
- Azure OpenAI also performs content moderation on the prompt inputs and generated outputs. The prompts or responses may be filtered if harmful content is detected. For more information, see the [content filtering](/azure/cognitive-services/openai/concepts/content-filter) article.
204+
205+
## Clean up resources
206+
207+
[!INCLUDE [Delete resource](../../common/delete-resource.md)]
208+

articles/cognitive-services/Speech-Service/includes/quickstarts/openai-speech/intro.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,11 @@ ms.author: eur
99
> [!IMPORTANT]
1010
> To complete the steps in this guide, access must be granted to Microsoft Azure OpenAI Service in the desired Azure subscription. Currently, access to this service is granted only by application. You can apply for access to Azure OpenAI by completing the form at [https://aka.ms/oai/access](https://aka.ms/oai/access).
1111
12-
In this how-to guide, you can use [Speech](../../../overview.md) to converse with [Azure OpenAI](/azure/cognitive-services/openai/overview). The text recognized by the Speech service is sent to Azure OpenAI. The text response from Azure OpenAI is then synthesized by the Speech service.
12+
In this how-to guide, you can use [Azure Cognitive Services Speech](../../../overview.md) to converse with [Azure OpenAI Service](/azure/cognitive-services/openai/overview). The text recognized by the Speech service is sent to Azure OpenAI. The text response from Azure OpenAI is then synthesized by the Speech service.
1313

1414
Speak into the microphone to start a conversation with Azure OpenAI.
15-
- Azure Cognitive Services Speech recognizes your speech and converts it into text (speech-to-text).
15+
- The Speech service recognizes your speech and converts it into text (speech to text).
1616
- Your request as text is sent to Azure OpenAI.
17-
- Azure Cognitive Services Speech synthesizes (text-to-speech) the response from Azure OpenAI to the default speaker.
17+
- The Speech service text to speech (TTS) feature synthesizes the response from Azure OpenAI to the default speaker.
1818

1919
Although the experience of this example is a back-and-forth exchange, Azure OpenAI doesn't remember the context of your conversation.

articles/cognitive-services/Speech-Service/includes/quickstarts/openai-speech/python.md

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -70,11 +70,11 @@ Follow these steps to create a new console application.
7070
7171
# Should be the locale for the speaker's language.
7272
speech_config.speech_recognition_language="en-US"
73-
speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_output_config)
73+
speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config)
7474
7575
# The language of the voice that responds on behalf of Azure OpenAI.
7676
speech_config.speech_synthesis_voice_name='en-US-JennyMultilingualNeural'
77-
speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config)
77+
speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=audio_output_config)
7878
7979
# Prompts Azure OpenAI with a request and synthesizes the response.
8080
def ask_openai(prompt):
@@ -119,7 +119,6 @@ Follow these steps to create a new console application.
119119
print("Speech Recognition canceled: {}".format(cancellation_details.reason))
120120
if cancellation_details.reason == speechsdk.CancellationReason.Error:
121121
print("Error details: {}".format(cancellation_details.error_details))
122-
print("Did you set the speech resource key and region values?")
123122
except EOFError:
124123
break
125124

articles/cognitive-services/Speech-Service/openai-speech.md

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,21 @@ manager: nitinme
88
ms.service: cognitive-services
99
ms.subservice: speech-service
1010
ms.topic: how-to
11-
ms.date: 03/07/2023
11+
ms.date: 04/15/2023
1212
ms.author: eur
13-
ms.devlang: python
13+
zone_pivot_groups: programming-languages-csharp-python
1414
keywords: speech to text, openai
1515
---
1616

1717
# Azure OpenAI speech to speech chat
1818

19+
::: zone pivot="programming-language-csharp"
20+
[!INCLUDE [C# include](./includes/quickstarts/openai-speech/csharp.md)]
21+
::: zone-end
22+
23+
::: zone pivot="programming-language-python"
1924
[!INCLUDE [Python include](./includes/quickstarts/openai-speech/python.md)]
25+
::: zone-end
2026

2127
## Next steps
2228

articles/zone-pivot-groups.yml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -719,6 +719,15 @@ groups:
719719
title: Python
720720
- id: programming-language-swift
721721
title: Swift
722+
- id: programming-languages-csharp-python
723+
# Owner: eur
724+
title: Programming languages
725+
prompt: Choose a programming language
726+
pivots:
727+
- id: programming-language-csharp
728+
title: C#
729+
- id: programming-language-python
730+
title: Python
722731
- id: programming-languages-speech-sdk-cli
723732
# Owner: eur
724733
title: Programming languages

0 commit comments

Comments
 (0)