Add auto language detection and multi-language with temperature support into C# SDK (#397)

selenayang888 · web-flow · commit 29747507ed48 · 2026-01-28T10:19:54.000-08:00
* Add auto language detetion and multi-language support into c# SDK

* Modify the audio input to match chat completiont input.

* Clean the code
diff --git a/sdk_v2/cs/src/OpenAI/AudioClient.cs b/sdk_v2/cs/src/OpenAI/AudioClient.cs
@@ -32,6 +32,20 @@ internal OpenAIAudioClient(string modelId)
         _modelId = modelId;
     }
 
+    /// <summary>
+    /// Settings that are supported by Foundry Local
+    /// </summary>
+    public record AudioSettings
+    {
+        public string? Language { get; set; }
+        public float? Temperature { get; set; }
+    }
+
+    /// <summary>
+    /// Settings to use for chat completions using this client.
+    /// </summary>
+    public AudioSettings Settings { get; } = new();
+
     /// <summary>
     /// Transcribe audio from a file.
     /// </summary>
@@ -74,11 +88,8 @@ public async IAsyncEnumerable<AudioCreateTranscriptionResponse> TranscribeAudioS
     private async Task<AudioCreateTranscriptionResponse> TranscribeAudioImplAsync(string audioFilePath,
                                                                                   CancellationToken? ct)
     {
-        var openaiRequest = new AudioCreateTranscriptionRequest
-        {
-            Model = _modelId,
-            FileName = audioFilePath
-        };
+        var openaiRequest = AudioTranscriptionCreateRequestExtended.FromUserInput(_modelId, audioFilePath, Settings);
+
 
         var request = new CoreInteropRequest
         {
@@ -100,11 +111,7 @@ private async Task<AudioCreateTranscriptionResponse> TranscribeAudioImplAsync(st
     private async IAsyncEnumerable<AudioCreateTranscriptionResponse> TranscribeAudioStreamingImplAsync(
         string audioFilePath, [EnumeratorCancellation] CancellationToken ct)
     {
-        var openaiRequest = new AudioCreateTranscriptionRequest
-        {
-            Model = _modelId,
-            FileName = audioFilePath
-        };
+        var openaiRequest = AudioTranscriptionCreateRequestExtended.FromUserInput(_modelId, audioFilePath, Settings);
 
         var request = new CoreInteropRequest
         {
diff --git a/sdk_v2/cs/src/OpenAI/AudioTranscriptionRequestResponseTypes.cs b/sdk_v2/cs/src/OpenAI/AudioTranscriptionRequestResponseTypes.cs
@@ -6,7 +6,9 @@
 
 namespace Microsoft.AI.Foundry.Local.OpenAI;
 
+using System.Globalization;
 using System.Text.Json;
+using System.Text.Json.Serialization;
 
 using Betalgo.Ranul.OpenAI.ObjectModels.RequestModels;
 using Betalgo.Ranul.OpenAI.ObjectModels.ResponseModels;
@@ -16,6 +18,49 @@ namespace Microsoft.AI.Foundry.Local.OpenAI;
 
 using Microsoft.Extensions.Logging;
 
+internal record AudioTranscriptionCreateRequestExtended : AudioCreateTranscriptionRequest
+{
+    // Valid entries:
+    // int language
+    // int temperature
+    [JsonPropertyName("metadata")]
+    public Dictionary<string, string>? Metadata { get; set; }
+
+    internal static AudioTranscriptionCreateRequestExtended FromUserInput(string modelId,
+                                                                      string audioFilePath,
+                                                                      OpenAIAudioClient.AudioSettings settings)
+    {
+        var request = new AudioTranscriptionCreateRequestExtended
+        {
+            Model = modelId,
+            FileName = audioFilePath,
+
+            // apply our specific settings
+            Language = settings.Language,
+            Temperature = settings.Temperature
+        };
+
+        var metadata = new Dictionary<string, string>();
+
+        if (settings.Language != null)
+        {
+            metadata["language"] = settings.Language;
+        }
+
+        if (settings.Temperature.HasValue)
+        {
+            metadata["temperature"] = settings.Temperature.Value.ToString(CultureInfo.InvariantCulture);
+        }
+
+        if (metadata.Count > 0)
+        {
+            request.Metadata = metadata;
+        }
+
+
+        return request;
+    }
+}
 internal static class AudioTranscriptionRequestResponseExtensions
 {
     internal static string ToJson(this AudioCreateTranscriptionRequest request)
diff --git a/sdk_v2/cs/test/FoundryLocal.Tests/AudioClientTests.cs b/sdk_v2/cs/test/FoundryLocal.Tests/AudioClientTests.cs
@@ -9,6 +9,7 @@ namespace Microsoft.AI.Foundry.Local.Tests;
 using System.Text;
 using System.Threading.Tasks;
 
+
 internal sealed class AudioClientTests
 {
     private static Model? model;
@@ -33,6 +34,7 @@ public async Task AudioTranscription_NoStreaming_Succeeds()
         var audioClient = await model!.GetAudioClientAsync();
         await Assert.That(audioClient).IsNotNull();
 
+        audioClient.Settings.Language = "en";
 
         var audioFilePath = Path.Combine(AppContext.BaseDirectory, "testdata/Recording.mp3");
 
@@ -45,12 +47,33 @@ public async Task AudioTranscription_NoStreaming_Succeeds()
         Console.WriteLine($"Response: {content}");
     }
 
+    [Test]
+    public async Task AudioTranscription_NoStreaming_Succeeds_WithTemperature()
+    {
+        var audioClient = await model!.GetAudioClientAsync();
+        await Assert.That(audioClient).IsNotNull();
+
+        audioClient.Settings.Language = "en";
+        audioClient.Settings.Temperature = 0.1f; // for deterministic results
+
+        var audioFilePath = "testdata/Recording.mp3";
+
+        var response = await audioClient.TranscribeAudioAsync(audioFilePath).ConfigureAwait(false);
+
+        await Assert.That(response).IsNotNull();
+        await Assert.That(response.Text).IsNotNull().And.IsNotEmpty();
+        var content = response.Text;
+        await Assert.That(content).IsEqualTo(" And lots of times you need to give people more than one link at a time. You a band could give their fans a couple new videos from the live concert behind the scenes photo gallery and album to purchase like these next few links.");
+        Console.WriteLine($"Response: {content}");
+    }
+
     [Test]
     public async Task AudioTranscription_Streaming_Succeeds()
     {
         var audioClient = await model!.GetAudioClientAsync();
         await Assert.That(audioClient).IsNotNull();
 
+        audioClient.Settings.Language = "en";
 
         var audioFilePath = Path.Combine(AppContext.BaseDirectory, "testdata/Recording.mp3");
 
@@ -70,5 +93,34 @@ public async Task AudioTranscription_Streaming_Succeeds()
         await Assert.That(fullResponse).IsEqualTo(" And lots of times you need to give people more than one link at a time. You a band could give their fans a couple new videos from the live concert behind the scenes photo gallery and album to purchase like these next few links.");
 
 
+    }
+
+    [Test]
+    public async Task AudioTranscription_Streaming_Succeeds_WithTemperature()
+    {
+        var audioClient = await model!.GetAudioClientAsync();
+        await Assert.That(audioClient).IsNotNull();
+
+        audioClient.Settings.Language = "en";
+        audioClient.Settings.Temperature = 0.1f; // for deterministic results
+
+        var audioFilePath = "testdata/Recording.mp3";
+
+        var updates = audioClient.TranscribeAudioStreamingAsync(audioFilePath, CancellationToken.None).ConfigureAwait(false);
+
+        StringBuilder responseMessage = new();
+        await foreach (var response in updates)
+        {
+            await Assert.That(response).IsNotNull();
+            await Assert.That(response.Text).IsNotNull().And.IsNotEmpty();
+            var content = response.Text;
+            responseMessage.Append(content);
+        }
+
+        var fullResponse = responseMessage.ToString();
+        Console.WriteLine(fullResponse);
+        await Assert.That(fullResponse).IsEqualTo(" And lots of times you need to give people more than one link at a time. You a band could give their fans a couple new videos from the live concert behind the scenes photo gallery and album to purchase like these next few links.");
+
+
     }
 }