Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
133 changes: 64 additions & 69 deletions dotnet/samples/console-from-mic/Program.cs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
using Azure.AI.OpenAI;
using Azure.Identity;
using OpenAI;
using OpenAI.RealtimeConversation;
using OpenAI.Realtime;
using System.ClientModel;

#pragma warning disable OPENAI002
Expand All @@ -12,21 +12,22 @@ public static async Task Main(string[] args)
{
// First, we create a client according to configured environment variables (see end of file) and then start
// a new conversation session.
RealtimeConversationClient client = GetConfiguredClient();
using RealtimeConversationSession session = await client.StartConversationSessionAsync();
OpenAIClient topLevelServiceClient = GetTopLevelServiceClient();
RealtimeClient realtimeClient = topLevelServiceClient.GetRealtimeClient();

using RealtimeSession session = await realtimeClient.StartConversationSessionAsync(GetModelOrDeploymentName());

// We'll add a simple function tool that enables the model to interpret user input to figure out when it
// might be a good time to stop the interaction.
ConversationFunctionTool finishConversationTool = new()
ConversationFunctionTool finishConversationTool = new("user_wants_to_finish_conversation")
{
Name = "user_wants_to_finish_conversation",
Description = "Invoked when the user says goodbye, expresses being finished, or otherwise seems to want to stop the interaction.",
Parameters = BinaryData.FromString("{}")
};

// Now we configure the session using the tool we created along with transcription options that enable input
// audio transcription with whisper.
await session.ConfigureSessionAsync(new ConversationSessionOptions()
await session.ConfigureConversationSessionAsync(new ConversationSessionOptions()
{
Tools = { finishConversationTool },
InputTranscriptionOptions = new()
Expand All @@ -39,7 +40,7 @@ await session.ConfigureSessionAsync(new ConversationSessionOptions()
SpeakerOutput speakerOutput = new();

// With the session configured, we start processing commands received from the service.
await foreach (ConversationUpdate update in session.ReceiveUpdatesAsync())
await foreach (RealtimeUpdate update in session.ReceiveUpdatesAsync())
{
// session.created is the very first command on a session and lets us know that connection was successful.
if (update is ConversationSessionStartedUpdate)
Expand All @@ -60,7 +61,7 @@ await session.ConfigureSessionAsync(new ConversationSessionOptions()

// input_audio_buffer.speech_started tells us that the beginning of speech was detected in the input audio
// we're sending from the microphone.
if (update is ConversationInputSpeechStartedUpdate speechStartedUpdate)
if (update is InputAudioSpeechStartedUpdate speechStartedUpdate)
{
Console.WriteLine($" <<< Start of speech detected @ {speechStartedUpdate.AudioStartTime}");
// Like any good listener, we can use the cue that the user started speaking as a hint that the app
Expand All @@ -71,42 +72,51 @@ await session.ConfigureSessionAsync(new ConversationSessionOptions()

// input_audio_buffer.speech_stopped tells us that the end of speech was detected in the input audio sent
// from the microphone. It'll automatically tell the model to start generating a response to reply back.
if (update is ConversationInputSpeechFinishedUpdate speechFinishedUpdate)
if (update is InputAudioSpeechFinishedUpdate speechFinishedUpdate)
{
Console.WriteLine($" <<< End of speech detected @ {speechFinishedUpdate.AudioEndTime}");
}

// conversation.item.input_audio_transcription.completed will only arrive if input transcription was
// configured for the session. It provides a written representation of what the user said, which can
// provide good feedback about what the model will use to respond.
if (update is ConversationInputTranscriptionFinishedUpdate transcriptionFinishedUpdate)
if (update is InputAudioTranscriptionFinishedUpdate transcriptionFinishedUpdate)
{
Console.WriteLine($" >>> USER: {transcriptionFinishedUpdate.Transcript}");
}

// Item streaming delta updates provide a combined view into incremental item data including output
// the audio response transcript, function arguments, and audio data.
if (update is ConversationItemStreamingPartDeltaUpdate deltaUpdate)
if (update is InputAudioTranscriptionDeltaUpdate deltaUpdate)
{
Console.Write(deltaUpdate.Delta);
}

if (update is OutputDeltaUpdate outputDeltaUpdate)
{
Console.Write(deltaUpdate.AudioTranscript);
Console.Write(deltaUpdate.Text);
speakerOutput.EnqueueForPlayback(deltaUpdate.AudioBytes);
if (!string.IsNullOrEmpty(outputDeltaUpdate.Text))
{
// If the model generates text, we print it to the console.
Console.Write(outputDeltaUpdate.Text);
}
if (outputDeltaUpdate.AudioBytes is not null)
{
speakerOutput.EnqueueForPlayback(outputDeltaUpdate.AudioBytes);
}
}

// response.output_item.done tells us that a model-generated item with streaming content is completed.
// That's a good signal to provide a visual break and perform final evaluation of tool calls.
if (update is ConversationItemStreamingFinishedUpdate itemFinishedUpdate)
if (update is OutputStreamingFinishedUpdate streamingFinishedUpdate)
{
Console.WriteLine();
if (itemFinishedUpdate.FunctionName == finishConversationTool.Name)
if (streamingFinishedUpdate.FunctionName == finishConversationTool.Name)
{
Console.WriteLine($" <<< Finish tool invoked -- ending conversation!");
break;
}
}

// error commands, as the name implies, are raised when something goes wrong.
if (update is ConversationErrorUpdate errorUpdate)
if (update is RealtimeErrorUpdate errorUpdate)
{
Console.WriteLine();
Console.WriteLine();
Expand All @@ -117,30 +127,36 @@ await session.ConfigureSessionAsync(new ConversationSessionOptions()
}
}

private static RealtimeConversationClient GetConfiguredClient()
private static OpenAIClient GetTopLevelServiceClient()
{
string? aoaiEndpoint = Environment.GetEnvironmentVariable("AZURE_OPENAI_ENDPOINT");
string? aoaiUseEntra = Environment.GetEnvironmentVariable("AZURE_OPENAI_USE_ENTRA");
string? aoaiDeployment = Environment.GetEnvironmentVariable("AZURE_OPENAI_DEPLOYMENT");
string? aoaiApiKey = Environment.GetEnvironmentVariable("AZURE_OPENAI_API_KEY");
string? oaiApiKey = Environment.GetEnvironmentVariable("OPENAI_API_KEY");

if (aoaiEndpoint is not null && bool.TryParse(aoaiUseEntra, out bool useEntra) && useEntra)
if (!string.IsNullOrEmpty(aoaiEndpoint))
{
return GetConfiguredClientForAzureOpenAIWithEntra(aoaiEndpoint, aoaiDeployment);
}
else if (aoaiEndpoint is not null && aoaiApiKey is not null)
{
return GetConfiguredClientForAzureOpenAIWithKey(aoaiEndpoint, aoaiDeployment, aoaiApiKey);
}
else if (aoaiEndpoint is not null)
{
throw new InvalidOperationException(
$"AZURE_OPENAI_ENDPOINT configured without AZURE_OPENAI_USE_ENTRA=true or AZURE_OPENAI_API_KEY.");
Console.WriteLine($"AZURE_OPENAI_ENDPOINT is defined: configuring AzureOpenAIClient with endpoint: {aoaiEndpoint}");
if (bool.TryParse(aoaiUseEntra, out bool useEntra) && useEntra)
{
Console.WriteLine($"AZURE_OPENAI_USE_ENTRA={aoaiUseEntra}: Using Entra token-based authentication");
return new AzureOpenAIClient(new Uri(aoaiEndpoint), new DefaultAzureCredential());
}
else if (!string.IsNullOrEmpty(aoaiApiKey))
{
Console.WriteLine($"Using API key (AZURE_OPENAI_API_KEY): {aoaiApiKey[..5]}**");
return new AzureOpenAIClient(new Uri(aoaiEndpoint), new ApiKeyCredential(aoaiApiKey));
}
else
{
throw new InvalidOperationException(
$"AZURE_OPENAI_ENDPOINT configured without AZURE_OPENAI_USE_ENTRA=true or AZURE_OPENAI_API_KEY.");
}
}
else if (oaiApiKey is not null)
else if (!string.IsNullOrEmpty(oaiApiKey))
{
return GetConfiguredClientForOpenAIWithKey(oaiApiKey);
Console.WriteLine($"OPENAI_API_KEY is defined: configuring OpenAIClient with API key: {oaiApiKey[..5]}**");
return new OpenAIClient(new ApiKeyCredential(oaiApiKey));
}
else
{
Expand All @@ -151,42 +167,21 @@ private static RealtimeConversationClient GetConfiguredClient()
}
}

private static RealtimeConversationClient GetConfiguredClientForAzureOpenAIWithEntra(
string aoaiEndpoint,
string? aoaiDeployment)
{
Console.WriteLine($" * Connecting to Azure OpenAI endpoint (AZURE_OPENAI_ENDPOINT): {aoaiEndpoint}");
Console.WriteLine($" * Using Entra token-based authentication (AZURE_OPENAI_USE_ENTRA)");
Console.WriteLine(string.IsNullOrEmpty(aoaiDeployment)
? $" * Using no deployment (AZURE_OPENAI_DEPLOYMENT)"
: $" * Using deployment (AZURE_OPENAI_DEPLOYMENT): {aoaiDeployment}");

AzureOpenAIClient aoaiClient = new(new Uri(aoaiEndpoint), new DefaultAzureCredential());
return aoaiClient.GetRealtimeConversationClient(aoaiDeployment);
}

private static RealtimeConversationClient GetConfiguredClientForAzureOpenAIWithKey(
string aoaiEndpoint,
string? aoaiDeployment,
string aoaiApiKey)
private static string GetModelOrDeploymentName()
{
Console.WriteLine($" * Connecting to Azure OpenAI endpoint (AZURE_OPENAI_ENDPOINT): {aoaiEndpoint}");
Console.WriteLine($" * Using API key (AZURE_OPENAI_API_KEY): {aoaiApiKey[..5]}**");
Console.WriteLine(string.IsNullOrEmpty(aoaiDeployment)
? $" * Using no deployment (AZURE_OPENAI_DEPLOYMENT)"
: $" * Using deployment (AZURE_OPENAI_DEPLOYMENT): {aoaiDeployment}");

AzureOpenAIClient aoaiClient = new(new Uri(aoaiEndpoint), new ApiKeyCredential(aoaiApiKey));
return aoaiClient.GetRealtimeConversationClient(aoaiDeployment);
}

private static RealtimeConversationClient GetConfiguredClientForOpenAIWithKey(string oaiApiKey)
{
string oaiEndpoint = "https://api.openai.com/v1";
Console.WriteLine($" * Connecting to OpenAI endpoint (OPENAI_ENDPOINT): {oaiEndpoint}");
Console.WriteLine($" * Using API key (OPENAI_API_KEY): {oaiApiKey[..5]}**");
foreach (string environmentVariableKey in new List<string> { "AZURE_OPENAI_DEPLOYMENT", "OPENAI_MODEL" })
{
string? value = Environment.GetEnvironmentVariable(environmentVariableKey);
if (!string.IsNullOrEmpty(value))
{
Console.WriteLine($"Using model/deployment from variable {environmentVariableKey}: {value}");
return value;
}
}

OpenAIClient aoaiClient = new(new ApiKeyCredential(oaiApiKey));
return aoaiClient.GetRealtimeConversationClient("gpt-4o-realtime-preview-2024-10-01");
throw new InvalidOperationException(
$"No model or deployment name configured. Please provide one of:\n"
+ " - AZURE_OPENAI_DEPLOYMENT\n"
+ " - OPENAI_MODEL");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
</PropertyGroup>

<ItemGroup>
<PackageReference Include="Azure.AI.OpenAI" Version="2.1.0-beta.2" />
<PackageReference Include="Azure.Identity" Version="1.13.1" />
<PackageReference Include="Azure.AI.OpenAI" Version="2.3.0-beta.1" />
<PackageReference Include="Azure.Identity" Version="1.15.0" />
<PackageReference Include="NAudio" Version="2.2.1" />
</ItemGroup>

Expand Down