Skip to content

Commit 2974750

Browse files
Add auto language detection and multi-language with temperature support into C# SDK (#397)
* Add auto language detetion and multi-language support into c# SDK * Modify the audio input to match chat completiont input. * Clean the code
1 parent c30cec2 commit 2974750

File tree

3 files changed

+114
-10
lines changed

3 files changed

+114
-10
lines changed

sdk_v2/cs/src/OpenAI/AudioClient.cs

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,20 @@ internal OpenAIAudioClient(string modelId)
3232
_modelId = modelId;
3333
}
3434

35+
/// <summary>
36+
/// Settings that are supported by Foundry Local
37+
/// </summary>
38+
public record AudioSettings
39+
{
40+
public string? Language { get; set; }
41+
public float? Temperature { get; set; }
42+
}
43+
44+
/// <summary>
45+
/// Settings to use for chat completions using this client.
46+
/// </summary>
47+
public AudioSettings Settings { get; } = new();
48+
3549
/// <summary>
3650
/// Transcribe audio from a file.
3751
/// </summary>
@@ -74,11 +88,8 @@ public async IAsyncEnumerable<AudioCreateTranscriptionResponse> TranscribeAudioS
7488
private async Task<AudioCreateTranscriptionResponse> TranscribeAudioImplAsync(string audioFilePath,
7589
CancellationToken? ct)
7690
{
77-
var openaiRequest = new AudioCreateTranscriptionRequest
78-
{
79-
Model = _modelId,
80-
FileName = audioFilePath
81-
};
91+
var openaiRequest = AudioTranscriptionCreateRequestExtended.FromUserInput(_modelId, audioFilePath, Settings);
92+
8293

8394
var request = new CoreInteropRequest
8495
{
@@ -100,11 +111,7 @@ private async Task<AudioCreateTranscriptionResponse> TranscribeAudioImplAsync(st
100111
private async IAsyncEnumerable<AudioCreateTranscriptionResponse> TranscribeAudioStreamingImplAsync(
101112
string audioFilePath, [EnumeratorCancellation] CancellationToken ct)
102113
{
103-
var openaiRequest = new AudioCreateTranscriptionRequest
104-
{
105-
Model = _modelId,
106-
FileName = audioFilePath
107-
};
114+
var openaiRequest = AudioTranscriptionCreateRequestExtended.FromUserInput(_modelId, audioFilePath, Settings);
108115

109116
var request = new CoreInteropRequest
110117
{

sdk_v2/cs/src/OpenAI/AudioTranscriptionRequestResponseTypes.cs

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,9 @@
66

77
namespace Microsoft.AI.Foundry.Local.OpenAI;
88

9+
using System.Globalization;
910
using System.Text.Json;
11+
using System.Text.Json.Serialization;
1012

1113
using Betalgo.Ranul.OpenAI.ObjectModels.RequestModels;
1214
using Betalgo.Ranul.OpenAI.ObjectModels.ResponseModels;
@@ -16,6 +18,49 @@ namespace Microsoft.AI.Foundry.Local.OpenAI;
1618

1719
using Microsoft.Extensions.Logging;
1820

21+
internal record AudioTranscriptionCreateRequestExtended : AudioCreateTranscriptionRequest
22+
{
23+
// Valid entries:
24+
// int language
25+
// int temperature
26+
[JsonPropertyName("metadata")]
27+
public Dictionary<string, string>? Metadata { get; set; }
28+
29+
internal static AudioTranscriptionCreateRequestExtended FromUserInput(string modelId,
30+
string audioFilePath,
31+
OpenAIAudioClient.AudioSettings settings)
32+
{
33+
var request = new AudioTranscriptionCreateRequestExtended
34+
{
35+
Model = modelId,
36+
FileName = audioFilePath,
37+
38+
// apply our specific settings
39+
Language = settings.Language,
40+
Temperature = settings.Temperature
41+
};
42+
43+
var metadata = new Dictionary<string, string>();
44+
45+
if (settings.Language != null)
46+
{
47+
metadata["language"] = settings.Language;
48+
}
49+
50+
if (settings.Temperature.HasValue)
51+
{
52+
metadata["temperature"] = settings.Temperature.Value.ToString(CultureInfo.InvariantCulture);
53+
}
54+
55+
if (metadata.Count > 0)
56+
{
57+
request.Metadata = metadata;
58+
}
59+
60+
61+
return request;
62+
}
63+
}
1964
internal static class AudioTranscriptionRequestResponseExtensions
2065
{
2166
internal static string ToJson(this AudioCreateTranscriptionRequest request)

sdk_v2/cs/test/FoundryLocal.Tests/AudioClientTests.cs

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ namespace Microsoft.AI.Foundry.Local.Tests;
99
using System.Text;
1010
using System.Threading.Tasks;
1111

12+
1213
internal sealed class AudioClientTests
1314
{
1415
private static Model? model;
@@ -33,6 +34,7 @@ public async Task AudioTranscription_NoStreaming_Succeeds()
3334
var audioClient = await model!.GetAudioClientAsync();
3435
await Assert.That(audioClient).IsNotNull();
3536

37+
audioClient.Settings.Language = "en";
3638

3739
var audioFilePath = Path.Combine(AppContext.BaseDirectory, "testdata/Recording.mp3");
3840

@@ -45,12 +47,33 @@ public async Task AudioTranscription_NoStreaming_Succeeds()
4547
Console.WriteLine($"Response: {content}");
4648
}
4749

50+
[Test]
51+
public async Task AudioTranscription_NoStreaming_Succeeds_WithTemperature()
52+
{
53+
var audioClient = await model!.GetAudioClientAsync();
54+
await Assert.That(audioClient).IsNotNull();
55+
56+
audioClient.Settings.Language = "en";
57+
audioClient.Settings.Temperature = 0.1f; // for deterministic results
58+
59+
var audioFilePath = "testdata/Recording.mp3";
60+
61+
var response = await audioClient.TranscribeAudioAsync(audioFilePath).ConfigureAwait(false);
62+
63+
await Assert.That(response).IsNotNull();
64+
await Assert.That(response.Text).IsNotNull().And.IsNotEmpty();
65+
var content = response.Text;
66+
await Assert.That(content).IsEqualTo(" And lots of times you need to give people more than one link at a time. You a band could give their fans a couple new videos from the live concert behind the scenes photo gallery and album to purchase like these next few links.");
67+
Console.WriteLine($"Response: {content}");
68+
}
69+
4870
[Test]
4971
public async Task AudioTranscription_Streaming_Succeeds()
5072
{
5173
var audioClient = await model!.GetAudioClientAsync();
5274
await Assert.That(audioClient).IsNotNull();
5375

76+
audioClient.Settings.Language = "en";
5477

5578
var audioFilePath = Path.Combine(AppContext.BaseDirectory, "testdata/Recording.mp3");
5679

@@ -70,5 +93,34 @@ public async Task AudioTranscription_Streaming_Succeeds()
7093
await Assert.That(fullResponse).IsEqualTo(" And lots of times you need to give people more than one link at a time. You a band could give their fans a couple new videos from the live concert behind the scenes photo gallery and album to purchase like these next few links.");
7194

7295

96+
}
97+
98+
[Test]
99+
public async Task AudioTranscription_Streaming_Succeeds_WithTemperature()
100+
{
101+
var audioClient = await model!.GetAudioClientAsync();
102+
await Assert.That(audioClient).IsNotNull();
103+
104+
audioClient.Settings.Language = "en";
105+
audioClient.Settings.Temperature = 0.1f; // for deterministic results
106+
107+
var audioFilePath = "testdata/Recording.mp3";
108+
109+
var updates = audioClient.TranscribeAudioStreamingAsync(audioFilePath, CancellationToken.None).ConfigureAwait(false);
110+
111+
StringBuilder responseMessage = new();
112+
await foreach (var response in updates)
113+
{
114+
await Assert.That(response).IsNotNull();
115+
await Assert.That(response.Text).IsNotNull().And.IsNotEmpty();
116+
var content = response.Text;
117+
responseMessage.Append(content);
118+
}
119+
120+
var fullResponse = responseMessage.ToString();
121+
Console.WriteLine(fullResponse);
122+
await Assert.That(fullResponse).IsEqualTo(" And lots of times you need to give people more than one link at a time. You a band could give their fans a couple new videos from the live concert behind the scenes photo gallery and album to purchase like these next few links.");
123+
124+
73125
}
74126
}

0 commit comments

Comments
 (0)