diff --git a/dotnet/samples/console-from-mic/Program.cs b/dotnet/samples/console-from-mic/Program.cs index 1f5f1ef..a75c86f 100644 --- a/dotnet/samples/console-from-mic/Program.cs +++ b/dotnet/samples/console-from-mic/Program.cs @@ -10,6 +10,12 @@ public class Program { public static async Task Main(string[] args) { + // Simpler more readable version of the Realtime API usage + // Decomposed into smaller functions for readability + // splited the ClientProvider into a separate class file for better organization + // the updated version of the code is in the RealtimeChatVoiceExecutor_V2.cs file + await RealtimeChatVoiceExecutor_V2.Execute(); + // First, we create a client according to configured environment variables (see end of file) and then start // a new conversation session. RealtimeConversationClient client = GetConfiguredClient(); diff --git a/dotnet/samples/console-from-mic/RealtimeChatVoice_V2/EnvironmentWellKnown.cs b/dotnet/samples/console-from-mic/RealtimeChatVoice_V2/EnvironmentWellKnown.cs new file mode 100644 index 0000000..031fc62 --- /dev/null +++ b/dotnet/samples/console-from-mic/RealtimeChatVoice_V2/EnvironmentWellKnown.cs @@ -0,0 +1,23 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +public static class EnvironmentWellKnown +{ + private static string? _deploymentName; + public static string DeploymentName => _deploymentName ??= Environment.GetEnvironmentVariable(Wellknown.AzureOpenAIDeployment); + + private static string? _endpoint; + public static string Endpoint => _endpoint ??= Environment.GetEnvironmentVariable(Wellknown.AzureOpenAIEndpoint); + + private static string? _apiKey; + public static string ApiKey => _apiKey ??= Environment.GetEnvironmentVariable(Wellknown.AzureOpenAIApiKey); + + private static string? _bingApiKey; + public static string BingApiKey => _bingApiKey ??= Environment.GetEnvironmentVariable(Wellknown.BingApiKey); + + private static string? _openAIApiKey; + public static string OpenAIApiKey => _openAIApiKey ??= Environment.GetEnvironmentVariable(Wellknown.OpenAIApiKey); +} diff --git a/dotnet/samples/console-from-mic/RealtimeChatVoice_V2/README.md b/dotnet/samples/console-from-mic/RealtimeChatVoice_V2/README.md new file mode 100644 index 0000000..013851e --- /dev/null +++ b/dotnet/samples/console-from-mic/RealtimeChatVoice_V2/README.md @@ -0,0 +1,37 @@ +# Description of Changes for Clarity + +The original code has been refactored into a more organized and modular structure for improved readability, maintainability, and separation of concerns. Below is a summary of the changes made: + +### 1. **Environment Configuration Handling** +- Created a new class `EnvironmentWellKnown` to encapsulate the logic of fetching environment variables. This reduces redundancy and centralizes environment-related logic. +- Defined constants in a separate class `Wellknown` to keep frequently used string values in a single location. This minimizes the chances of typos and makes changes easier. + +### 2. **Client Provider Class** +- Extracted the logic for client configuration into a new static class `RealtimeClientProvider`. + - Provides methods to create configured instances of `RealtimeConversationClient`. + - This reduces complexity in the main class and provides a cleaner way to manage client creation logic. + +### 3. **Main Executor Class** +- Created a new class `RealtimeChatVoiceExecutor_V2` which contains the main execution logic. +- Refactored the execution flow into smaller methods to promote modularity and enhance readability: + - `Execute` method as the main entry point. + - `ConfigureSessionAsync` for session configuration. + - `ConfigureFinishTool` to set up the conversation finish tool. + - `ProcessSessionUpdatesAsync` to handle updates in an organized way. + +### 4. **Update Handling and Modularity** +- Split the handling of session updates into dedicated methods: + - `HandleSessionStarted`, `HandleSpeechStarted`, `HandleSpeechFinished`, `HandleInputTranscription`, `HandleAudioDelta`, `HandleOutputTranscription`, `HandleItemFinished`, and `HandleError`. + - This makes each update type easier to understand and maintain, as each handler is responsible for only one type of update. + +### 5. **Improved Code Organization** +- Each class is placed in a separate file, following the Single Responsibility Principle (SRP) and improving overall project structure. +- Static classes are used where appropriate to ensure helper methods and shared configurations are available without needing instance management. + +### 6. **Logging and Error Handling** +- Enhanced logging to provide better insight into the process flow. +- Added exception handling to provide fallback behavior for potential failures (e.g., microphone input failure). + +### 7. **Code Cleanup** +- Removed redundant comments and excessive inline documentation for more concise code. +- Used meaningful method and variable names to convey purpose, reducing the need for extensive comments. \ No newline at end of file diff --git a/dotnet/samples/console-from-mic/RealtimeChatVoice_V2/RealtimeChatVoiceExecutor_V2.cs b/dotnet/samples/console-from-mic/RealtimeChatVoice_V2/RealtimeChatVoiceExecutor_V2.cs new file mode 100644 index 0000000..9d1fc2e --- /dev/null +++ b/dotnet/samples/console-from-mic/RealtimeChatVoice_V2/RealtimeChatVoiceExecutor_V2.cs @@ -0,0 +1,159 @@ +using Azure.AI.OpenAI; +using Azure.Identity; +using OpenAI; +using OpenAI.RealtimeConversation; +using System.ClientModel; + +#pragma warning disable OPENAI002 + +public static class RealtimeChatVoiceExecutor_V2 +{ + public static async Task Execute() + { + RealtimeConversationClient client = RealtimeClientProvider.GetConfiguredClient(); + using RealtimeConversationSession session = await client.StartConversationSessionAsync(); + + ConversationFunctionTool finishConversationTool = ConfigureFinishTool(); + await ConfigureSessionAsync(session, finishConversationTool); + + SpeakerOutput speakerOutput = new(); + + await ProcessSessionUpdatesAsync(session, finishConversationTool, speakerOutput); + } + + private static async Task ConfigureSessionAsync(RealtimeConversationSession session, ConversationFunctionTool finishConversationTool) + { + await session.ConfigureSessionAsync(new ConversationSessionOptions() + { + Tools = { finishConversationTool }, + InputTranscriptionOptions = new() + { + Model = Wellknown.WhisperModel, + }, + }); + } + + private static ConversationFunctionTool ConfigureFinishTool() + { + return new ConversationFunctionTool() + { + Name = Wellknown.FinishConversationToolName, + Description = "Invoked when the user says goodbye, expresses being finished, or otherwise seems to want to stop the interaction.", + Parameters = BinaryData.FromString("{}") + }; + } + + private static async Task ProcessSessionUpdatesAsync(RealtimeConversationSession session, ConversationFunctionTool finishConversationTool, SpeakerOutput speakerOutput) + { + await foreach (ConversationUpdate update in session.ReceiveUpdatesAsync()) + { + switch (update) + { + case ConversationSessionStartedUpdate: + HandleSessionStarted(session); + break; + + case ConversationInputSpeechStartedUpdate: + HandleSpeechStarted(speakerOutput); + break; + + case ConversationInputSpeechFinishedUpdate: + HandleSpeechFinished(); + break; + + case ConversationInputTranscriptionFinishedUpdate transcriptionFinishedUpdate: + HandleInputTranscription(transcriptionFinishedUpdate); + break; + + case ConversationAudioDeltaUpdate audioDeltaUpdate: + HandleAudioDelta(audioDeltaUpdate, speakerOutput); + break; + + case ConversationOutputTranscriptionDeltaUpdate outputTranscriptionDeltaUpdate: + HandleOutputTranscription(outputTranscriptionDeltaUpdate); + break; + + case ConversationItemFinishedUpdate itemFinishedUpdate: + if (HandleItemFinished(itemFinishedUpdate, finishConversationTool)) + { + return; // End the conversation + } + break; + + case ConversationErrorUpdate errorUpdate: + HandleError(errorUpdate); + return; + + default: + Console.WriteLine("Unhandled update type."); + break; + } + } + } + + private static void HandleSessionStarted(RealtimeConversationSession session) + { + Console.WriteLine(" <<< Connected: session started"); + + _ = Task.Run(async () => + { + try + { + using MicrophoneAudioStream microphoneInput = MicrophoneAudioStream.Start(); + Console.WriteLine(" >>> Listening to microphone input"); + Console.WriteLine(" >>> (Just tell the app you're done to finish)"); + Console.WriteLine(); + await session.SendAudioAsync(microphoneInput); + } + catch (Exception ex) + { + Console.WriteLine($"Microphone input failed: {ex.Message}"); + } + }); + } + + private static void HandleSpeechStarted(SpeakerOutput speakerOutput) + { + Console.WriteLine(" <<< Start of speech detected"); + speakerOutput.ClearPlayback(); + } + + private static void HandleSpeechFinished() + { + Console.WriteLine(" <<< End of speech detected"); + } + + private static void HandleInputTranscription(ConversationInputTranscriptionFinishedUpdate transcriptionFinishedUpdate) + { + Console.WriteLine($" >>> USER: {transcriptionFinishedUpdate.Transcript}"); + } + + private static void HandleAudioDelta(ConversationAudioDeltaUpdate audioDeltaUpdate, SpeakerOutput speakerOutput) + { + speakerOutput.EnqueueForPlayback(audioDeltaUpdate.Delta); + } + + private static void HandleOutputTranscription(ConversationOutputTranscriptionDeltaUpdate outputTranscriptionDeltaUpdate) + { + Console.Write(outputTranscriptionDeltaUpdate.Delta); + } + + private static bool HandleItemFinished(ConversationItemFinishedUpdate itemFinishedUpdate, ConversationFunctionTool finishConversationTool) + { + Console.WriteLine(); + if (itemFinishedUpdate.FunctionName == finishConversationTool.Name) + { + Console.WriteLine(" <<< Finish tool invoked -- ending conversation!"); + return true; + } + return false; + } + + private static void HandleError(ConversationErrorUpdate errorUpdate) + { + Console.WriteLine(); + Console.WriteLine($" <<< ERROR: {errorUpdate.ErrorMessage}"); + Console.WriteLine(errorUpdate.GetRawContent().ToString()); + } + +} \ No newline at end of file diff --git a/dotnet/samples/console-from-mic/RealtimeChatVoice_V2/RealtimeClientProvider.cs b/dotnet/samples/console-from-mic/RealtimeChatVoice_V2/RealtimeClientProvider.cs new file mode 100644 index 0000000..56bb7e2 --- /dev/null +++ b/dotnet/samples/console-from-mic/RealtimeChatVoice_V2/RealtimeClientProvider.cs @@ -0,0 +1,82 @@ +using Azure.AI.OpenAI; +using Azure.Identity; +using OpenAI; +using OpenAI.RealtimeConversation; +using System.ClientModel; + +#pragma warning disable OPENAI002 +public static class RealtimeClientProvider +{ + public static RealtimeConversationClient GetConfiguredClient() + { + string? aoaiEndpoint = EnvironmentWellKnown.Endpoint; + string? aoaiUseEntra = Environment.GetEnvironmentVariable(Wellknown.AzureOpenAIUseEntra); + string? aoaiDeployment = EnvironmentWellKnown.DeploymentName; + string? aoaiApiKey = EnvironmentWellKnown.ApiKey; + string? oaiApiKey = EnvironmentWellKnown.OpenAIApiKey; + + if (aoaiEndpoint is not null && bool.TryParse(aoaiUseEntra, out bool useEntra) && useEntra) + { + return GetConfiguredClientForAzureOpenAIWithEntra(aoaiEndpoint, aoaiDeployment); + } + else if (aoaiEndpoint is not null && aoaiApiKey is not null) + { + return GetConfiguredClientForAzureOpenAIWithKey(aoaiEndpoint, aoaiDeployment, aoaiApiKey); + } + else if (aoaiEndpoint is not null) + { + throw new InvalidOperationException( + $"AZURE_OPENAI_ENDPOINT configured without AZURE_OPENAI_USE_ENTRA=true or AZURE_OPENAI_API_KEY."); + } + else if (oaiApiKey is not null) + { + return GetConfiguredClientForOpenAIWithKey(oaiApiKey); + } + else + { + throw new InvalidOperationException( + $"No environment configuration present. Please provide one of:\n" + + " - AZURE_OPENAI_ENDPOINT with AZURE_OPENAI_USE_ENTRA=true or AZURE_OPENAI_API_KEY\n" + + " - OPENAI_API_KEY"); + } + } + + private static RealtimeConversationClient GetConfiguredClientForAzureOpenAIWithEntra( + string aoaiEndpoint, + string? aoaiDeployment) + { + Console.WriteLine($" * Connecting to Azure OpenAI endpoint (AZURE_OPENAI_ENDPOINT): {aoaiEndpoint}"); + Console.WriteLine($" * Using Entra token-based authentication (AZURE_OPENAI_USE_ENTRA)"); + Console.WriteLine(string.IsNullOrEmpty(aoaiDeployment) + ? " * Using no deployment (AZURE_OPENAI_DEPLOYMENT)" + : $" * Using deployment (AZURE_OPENAI_DEPLOYMENT): {aoaiDeployment}"); + + AzureOpenAIClient aoaiClient = new(new Uri(aoaiEndpoint), new DefaultAzureCredential()); + return aoaiClient.GetRealtimeConversationClient(aoaiDeployment); + } + + private static RealtimeConversationClient GetConfiguredClientForAzureOpenAIWithKey( + string aoaiEndpoint, + string? aoaiDeployment, + string aoaiApiKey) + { + Console.WriteLine($" * Connecting to Azure OpenAI endpoint (AZURE_OPENAI_ENDPOINT): {aoaiEndpoint}"); + Console.WriteLine($" * Using API key (AZURE_OPENAI_API_KEY): {aoaiApiKey[..5]}**"); + Console.WriteLine(string.IsNullOrEmpty(aoaiDeployment) + ? " * Using no deployment (AZURE_OPENAI_DEPLOYMENT)" + : $" * Using deployment (AZURE_OPENAI_DEPLOYMENT): {aoaiDeployment}"); + + AzureOpenAIClient aoaiClient = new(new Uri(aoaiEndpoint), new ApiKeyCredential(aoaiApiKey)); + return aoaiClient.GetRealtimeConversationClient(aoaiDeployment); + } + + private static RealtimeConversationClient GetConfiguredClientForOpenAIWithKey(string oaiApiKey) + { + string oaiEndpoint = Wellknown.OpenAIEndpoint; + Console.WriteLine($" * Connecting to OpenAI endpoint (OPENAI_ENDPOINT): {oaiEndpoint}"); + Console.WriteLine($" * Using API key (OPENAI_API_KEY): {oaiApiKey[..5]}**"); + + OpenAIClient aoaiClient = new(new ApiKeyCredential(oaiApiKey)); + return aoaiClient.GetRealtimeConversationClient("gpt-4o-realtime-preview-2024-10-01"); + } +} diff --git a/dotnet/samples/console-from-mic/RealtimeChatVoice_V2/Wellknown.cs b/dotnet/samples/console-from-mic/RealtimeChatVoice_V2/Wellknown.cs new file mode 100644 index 0000000..c072a52 --- /dev/null +++ b/dotnet/samples/console-from-mic/RealtimeChatVoice_V2/Wellknown.cs @@ -0,0 +1,18 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +public static class Wellknown +{ + public const string WhisperModel = "whisper-1"; + public const string FinishConversationToolName = "user_wants_to_finish_conversation"; + public const string AzureOpenAIEndpoint = "AZURE_OPENAI_EASTUS2_ENDPOINT"; + public const string AzureOpenAIUseEntra = "AZURE_OPENAI_USE_ENTRA"; + public const string AzureOpenAIDeployment = "AZURE_OPENAI_EASTUS2_DEPLOYMENT"; + public const string AzureOpenAIApiKey = "AZURE_OPENAI_EASTUS2_API_KEY"; + public const string OpenAIEndpoint = "https://api.openai.com/v1"; + public const string OpenAIApiKey = "OPENAI_API_KEY"; + public const string BingApiKey = "Bing_ApiKey"; +} \ No newline at end of file