Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions dotnet/samples/console-from-mic/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,12 @@ public class Program
{
public static async Task Main(string[] args)
{
// Simpler more readable version of the Realtime API usage
// Decomposed into smaller functions for readability
// splited the ClientProvider into a separate class file for better organization
// the updated version of the code is in the RealtimeChatVoiceExecutor_V2.cs file
await RealtimeChatVoiceExecutor_V2.Execute();

// First, we create a client according to configured environment variables (see end of file) and then start
// a new conversation session.
RealtimeConversationClient client = GetConfiguredClient();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

public static class EnvironmentWellKnown
{
private static string? _deploymentName;
public static string DeploymentName => _deploymentName ??= Environment.GetEnvironmentVariable(Wellknown.AzureOpenAIDeployment);

private static string? _endpoint;
public static string Endpoint => _endpoint ??= Environment.GetEnvironmentVariable(Wellknown.AzureOpenAIEndpoint);

private static string? _apiKey;
public static string ApiKey => _apiKey ??= Environment.GetEnvironmentVariable(Wellknown.AzureOpenAIApiKey);

private static string? _bingApiKey;
public static string BingApiKey => _bingApiKey ??= Environment.GetEnvironmentVariable(Wellknown.BingApiKey);

private static string? _openAIApiKey;
public static string OpenAIApiKey => _openAIApiKey ??= Environment.GetEnvironmentVariable(Wellknown.OpenAIApiKey);
}
37 changes: 37 additions & 0 deletions dotnet/samples/console-from-mic/RealtimeChatVoice_V2/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# Description of Changes for Clarity

The original code has been refactored into a more organized and modular structure for improved readability, maintainability, and separation of concerns. Below is a summary of the changes made:

### 1. **Environment Configuration Handling**
- Created a new class `EnvironmentWellKnown` to encapsulate the logic of fetching environment variables. This reduces redundancy and centralizes environment-related logic.
- Defined constants in a separate class `Wellknown` to keep frequently used string values in a single location. This minimizes the chances of typos and makes changes easier.

### 2. **Client Provider Class**
- Extracted the logic for client configuration into a new static class `RealtimeClientProvider`.
- Provides methods to create configured instances of `RealtimeConversationClient`.
- This reduces complexity in the main class and provides a cleaner way to manage client creation logic.

### 3. **Main Executor Class**
- Created a new class `RealtimeChatVoiceExecutor_V2` which contains the main execution logic.
- Refactored the execution flow into smaller methods to promote modularity and enhance readability:
- `Execute` method as the main entry point.
- `ConfigureSessionAsync` for session configuration.
- `ConfigureFinishTool` to set up the conversation finish tool.
- `ProcessSessionUpdatesAsync` to handle updates in an organized way.

### 4. **Update Handling and Modularity**
- Split the handling of session updates into dedicated methods:
- `HandleSessionStarted`, `HandleSpeechStarted`, `HandleSpeechFinished`, `HandleInputTranscription`, `HandleAudioDelta`, `HandleOutputTranscription`, `HandleItemFinished`, and `HandleError`.
- This makes each update type easier to understand and maintain, as each handler is responsible for only one type of update.

### 5. **Improved Code Organization**
- Each class is placed in a separate file, following the Single Responsibility Principle (SRP) and improving overall project structure.
- Static classes are used where appropriate to ensure helper methods and shared configurations are available without needing instance management.

### 6. **Logging and Error Handling**
- Enhanced logging to provide better insight into the process flow.
- Added exception handling to provide fallback behavior for potential failures (e.g., microphone input failure).

### 7. **Code Cleanup**
- Removed redundant comments and excessive inline documentation for more concise code.
- Used meaningful method and variable names to convey purpose, reducing the need for extensive comments.
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
using Azure.AI.OpenAI;
using Azure.Identity;
using OpenAI;
using OpenAI.RealtimeConversation;
using System.ClientModel;

#pragma warning disable OPENAI002

public static class RealtimeChatVoiceExecutor_V2
{
public static async Task Execute()
{
RealtimeConversationClient client = RealtimeClientProvider.GetConfiguredClient();
using RealtimeConversationSession session = await client.StartConversationSessionAsync();

ConversationFunctionTool finishConversationTool = ConfigureFinishTool();
await ConfigureSessionAsync(session, finishConversationTool);

SpeakerOutput speakerOutput = new();

await ProcessSessionUpdatesAsync(session, finishConversationTool, speakerOutput);
}

private static async Task ConfigureSessionAsync(RealtimeConversationSession session, ConversationFunctionTool finishConversationTool)
{
await session.ConfigureSessionAsync(new ConversationSessionOptions()
{
Tools = { finishConversationTool },
InputTranscriptionOptions = new()
{
Model = Wellknown.WhisperModel,
},
});
}

private static ConversationFunctionTool ConfigureFinishTool()
{
return new ConversationFunctionTool()
{
Name = Wellknown.FinishConversationToolName,
Description = "Invoked when the user says goodbye, expresses being finished, or otherwise seems to want to stop the interaction.",
Parameters = BinaryData.FromString("{}")
};
}

private static async Task ProcessSessionUpdatesAsync(RealtimeConversationSession session, ConversationFunctionTool finishConversationTool, SpeakerOutput speakerOutput)
{
await foreach (ConversationUpdate update in session.ReceiveUpdatesAsync())
{
switch (update)
{
case ConversationSessionStartedUpdate:
HandleSessionStarted(session);
break;

case ConversationInputSpeechStartedUpdate:
HandleSpeechStarted(speakerOutput);
break;

case ConversationInputSpeechFinishedUpdate:
HandleSpeechFinished();
break;

case ConversationInputTranscriptionFinishedUpdate transcriptionFinishedUpdate:
HandleInputTranscription(transcriptionFinishedUpdate);
break;

case ConversationAudioDeltaUpdate audioDeltaUpdate:
HandleAudioDelta(audioDeltaUpdate, speakerOutput);
break;

case ConversationOutputTranscriptionDeltaUpdate outputTranscriptionDeltaUpdate:
HandleOutputTranscription(outputTranscriptionDeltaUpdate);
break;

case ConversationItemFinishedUpdate itemFinishedUpdate:
if (HandleItemFinished(itemFinishedUpdate, finishConversationTool))
{
return; // End the conversation
}
break;

case ConversationErrorUpdate errorUpdate:
HandleError(errorUpdate);
return;

default:
Console.WriteLine("Unhandled update type.");
break;
}
}
}

private static void HandleSessionStarted(RealtimeConversationSession session)
{
Console.WriteLine(" <<< Connected: session started");

_ = Task.Run(async () =>
{
try
{
using MicrophoneAudioStream microphoneInput = MicrophoneAudioStream.Start();
Console.WriteLine(" >>> Listening to microphone input");
Console.WriteLine(" >>> (Just tell the app you're done to finish)");
Console.WriteLine();
await session.SendAudioAsync(microphoneInput);
}
catch (Exception ex)
{
Console.WriteLine($"Microphone input failed: {ex.Message}");
}
});
}

private static void HandleSpeechStarted(SpeakerOutput speakerOutput)
{
Console.WriteLine(" <<< Start of speech detected");
speakerOutput.ClearPlayback();
}

private static void HandleSpeechFinished()
{
Console.WriteLine(" <<< End of speech detected");
}

private static void HandleInputTranscription(ConversationInputTranscriptionFinishedUpdate transcriptionFinishedUpdate)
{
Console.WriteLine($" >>> USER: {transcriptionFinishedUpdate.Transcript}");
}

private static void HandleAudioDelta(ConversationAudioDeltaUpdate audioDeltaUpdate, SpeakerOutput speakerOutput)
{
speakerOutput.EnqueueForPlayback(audioDeltaUpdate.Delta);
}

private static void HandleOutputTranscription(ConversationOutputTranscriptionDeltaUpdate outputTranscriptionDeltaUpdate)
{
Console.Write(outputTranscriptionDeltaUpdate.Delta);
}

private static bool HandleItemFinished(ConversationItemFinishedUpdate itemFinishedUpdate, ConversationFunctionTool finishConversationTool)
{
Console.WriteLine();
if (itemFinishedUpdate.FunctionName == finishConversationTool.Name)
{
Console.WriteLine(" <<< Finish tool invoked -- ending conversation!");
return true;
}
return false;
}

private static void HandleError(ConversationErrorUpdate errorUpdate)
{
Console.WriteLine();
Console.WriteLine($" <<< ERROR: {errorUpdate.ErrorMessage}");
Console.WriteLine(errorUpdate.GetRawContent().ToString());
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
using Azure.AI.OpenAI;
using Azure.Identity;
using OpenAI;
using OpenAI.RealtimeConversation;
using System.ClientModel;

#pragma warning disable OPENAI002
public static class RealtimeClientProvider
{
public static RealtimeConversationClient GetConfiguredClient()
{
string? aoaiEndpoint = EnvironmentWellKnown.Endpoint;
string? aoaiUseEntra = Environment.GetEnvironmentVariable(Wellknown.AzureOpenAIUseEntra);
string? aoaiDeployment = EnvironmentWellKnown.DeploymentName;
string? aoaiApiKey = EnvironmentWellKnown.ApiKey;
string? oaiApiKey = EnvironmentWellKnown.OpenAIApiKey;

if (aoaiEndpoint is not null && bool.TryParse(aoaiUseEntra, out bool useEntra) && useEntra)
{
return GetConfiguredClientForAzureOpenAIWithEntra(aoaiEndpoint, aoaiDeployment);
}
else if (aoaiEndpoint is not null && aoaiApiKey is not null)
{
return GetConfiguredClientForAzureOpenAIWithKey(aoaiEndpoint, aoaiDeployment, aoaiApiKey);
}
else if (aoaiEndpoint is not null)
{
throw new InvalidOperationException(
$"AZURE_OPENAI_ENDPOINT configured without AZURE_OPENAI_USE_ENTRA=true or AZURE_OPENAI_API_KEY.");
}
else if (oaiApiKey is not null)
{
return GetConfiguredClientForOpenAIWithKey(oaiApiKey);
}
else
{
throw new InvalidOperationException(
$"No environment configuration present. Please provide one of:\n"
+ " - AZURE_OPENAI_ENDPOINT with AZURE_OPENAI_USE_ENTRA=true or AZURE_OPENAI_API_KEY\n"
+ " - OPENAI_API_KEY");
}
}

private static RealtimeConversationClient GetConfiguredClientForAzureOpenAIWithEntra(
string aoaiEndpoint,
string? aoaiDeployment)
{
Console.WriteLine($" * Connecting to Azure OpenAI endpoint (AZURE_OPENAI_ENDPOINT): {aoaiEndpoint}");
Console.WriteLine($" * Using Entra token-based authentication (AZURE_OPENAI_USE_ENTRA)");
Console.WriteLine(string.IsNullOrEmpty(aoaiDeployment)
? " * Using no deployment (AZURE_OPENAI_DEPLOYMENT)"
: $" * Using deployment (AZURE_OPENAI_DEPLOYMENT): {aoaiDeployment}");

AzureOpenAIClient aoaiClient = new(new Uri(aoaiEndpoint), new DefaultAzureCredential());
return aoaiClient.GetRealtimeConversationClient(aoaiDeployment);
}

private static RealtimeConversationClient GetConfiguredClientForAzureOpenAIWithKey(
string aoaiEndpoint,
string? aoaiDeployment,
string aoaiApiKey)
{
Console.WriteLine($" * Connecting to Azure OpenAI endpoint (AZURE_OPENAI_ENDPOINT): {aoaiEndpoint}");
Console.WriteLine($" * Using API key (AZURE_OPENAI_API_KEY): {aoaiApiKey[..5]}**");
Console.WriteLine(string.IsNullOrEmpty(aoaiDeployment)
? " * Using no deployment (AZURE_OPENAI_DEPLOYMENT)"
: $" * Using deployment (AZURE_OPENAI_DEPLOYMENT): {aoaiDeployment}");

AzureOpenAIClient aoaiClient = new(new Uri(aoaiEndpoint), new ApiKeyCredential(aoaiApiKey));
return aoaiClient.GetRealtimeConversationClient(aoaiDeployment);
}

private static RealtimeConversationClient GetConfiguredClientForOpenAIWithKey(string oaiApiKey)
{
string oaiEndpoint = Wellknown.OpenAIEndpoint;
Console.WriteLine($" * Connecting to OpenAI endpoint (OPENAI_ENDPOINT): {oaiEndpoint}");
Console.WriteLine($" * Using API key (OPENAI_API_KEY): {oaiApiKey[..5]}**");

OpenAIClient aoaiClient = new(new ApiKeyCredential(oaiApiKey));
return aoaiClient.GetRealtimeConversationClient("gpt-4o-realtime-preview-2024-10-01");
}
}
18 changes: 18 additions & 0 deletions dotnet/samples/console-from-mic/RealtimeChatVoice_V2/Wellknown.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

public static class Wellknown
{
public const string WhisperModel = "whisper-1";
public const string FinishConversationToolName = "user_wants_to_finish_conversation";
public const string AzureOpenAIEndpoint = "AZURE_OPENAI_EASTUS2_ENDPOINT";
public const string AzureOpenAIUseEntra = "AZURE_OPENAI_USE_ENTRA";
public const string AzureOpenAIDeployment = "AZURE_OPENAI_EASTUS2_DEPLOYMENT";
public const string AzureOpenAIApiKey = "AZURE_OPENAI_EASTUS2_API_KEY";
public const string OpenAIEndpoint = "https://api.openai.com/v1";
public const string OpenAIApiKey = "OPENAI_API_KEY";
public const string BingApiKey = "Bing_ApiKey";
}