Skip to content

Commit 324ea9e

Browse files
Added audio recording (#615)
1 parent 724cf55 commit 324ea9e

34 files changed

+868
-31
lines changed

app/MindWork AI Studio.sln.DotSettings

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
<s:String x:Key="/Default/CodeStyle/Naming/CSharpNaming/Abbreviations/=GWDG/@EntryIndexedValue">GWDG</s:String>
77
<s:String x:Key="/Default/CodeStyle/Naming/CSharpNaming/Abbreviations/=HF/@EntryIndexedValue">HF</s:String>
88
<s:String x:Key="/Default/CodeStyle/Naming/CSharpNaming/Abbreviations/=IERI/@EntryIndexedValue">IERI</s:String>
9+
<s:String x:Key="/Default/CodeStyle/Naming/CSharpNaming/Abbreviations/=IMIME/@EntryIndexedValue">IMIME</s:String>
910
<s:String x:Key="/Default/CodeStyle/Naming/CSharpNaming/Abbreviations/=LLM/@EntryIndexedValue">LLM</s:String>
1011
<s:String x:Key="/Default/CodeStyle/Naming/CSharpNaming/Abbreviations/=LM/@EntryIndexedValue">LM</s:String>
1112
<s:String x:Key="/Default/CodeStyle/Naming/CSharpNaming/Abbreviations/=MSG/@EntryIndexedValue">MSG</s:String>
@@ -18,10 +19,12 @@
1819
<s:String x:Key="/Default/CodeStyle/Naming/CSharpNaming/Abbreviations/=URL/@EntryIndexedValue">URL</s:String>
1920
<s:String x:Key="/Default/CodeStyle/Naming/CSharpNaming/Abbreviations/=I18N/@EntryIndexedValue">I18N</s:String>
2021
<s:Boolean x:Key="/Default/UserDictionary/Words/=agentic/@EntryIndexedValue">True</s:Boolean>
22+
<s:Boolean x:Key="/Default/UserDictionary/Words/=eri/@EntryIndexedValue">True</s:Boolean>
2123
<s:Boolean x:Key="/Default/UserDictionary/Words/=groq/@EntryIndexedValue">True</s:Boolean>
2224
<s:Boolean x:Key="/Default/UserDictionary/Words/=gwdg/@EntryIndexedValue">True</s:Boolean>
2325
<s:Boolean x:Key="/Default/UserDictionary/Words/=huggingface/@EntryIndexedValue">True</s:Boolean>
2426
<s:Boolean x:Key="/Default/UserDictionary/Words/=ieri/@EntryIndexedValue">True</s:Boolean>
27+
<s:Boolean x:Key="/Default/UserDictionary/Words/=mime/@EntryIndexedValue">True</s:Boolean>
2528
<s:Boolean x:Key="/Default/UserDictionary/Words/=mwais/@EntryIndexedValue">True</s:Boolean>
2629
<s:Boolean x:Key="/Default/UserDictionary/Words/=ollama/@EntryIndexedValue">True</s:Boolean>
2730
<s:Boolean x:Key="/Default/UserDictionary/Words/=tauri_0027s/@EntryIndexedValue">True</s:Boolean></wpf:ResourceDictionary>

app/MindWork AI Studio/Assistants/I18N/allTexts.lua

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2323,6 +2323,12 @@ UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VISION::T428040679"] = "Content creation"
23232323
-- Useful assistants
23242324
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VISION::T586430036"] = "Useful assistants"
23252325

2326+
-- Stop recording and start transcription
2327+
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VOICERECORDER::T224155287"] = "Stop recording and start transcription"
2328+
2329+
-- Start recording your voice for a transcription
2330+
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VOICERECORDER::T2372624045"] = "Start recording your voice for a transcription"
2331+
23262332
-- Are you sure you want to delete the chat '{0}' in the workspace '{1}'?
23272333
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::WORKSPACES::T1016188706"] = "Are you sure you want to delete the chat '{0}' in the workspace '{1}'?"
23282334

@@ -5368,6 +5374,9 @@ UI_TEXT_CONTENT["AISTUDIO::SETTINGS::DATAMODEL::PREVIEWFEATURESEXTENSIONS::T1848
53685374
-- Plugins: Preview of our plugin system where you can extend the functionality of the app
53695375
UI_TEXT_CONTENT["AISTUDIO::SETTINGS::DATAMODEL::PREVIEWFEATURESEXTENSIONS::T2056842933"] = "Plugins: Preview of our plugin system where you can extend the functionality of the app"
53705376

5377+
-- Speech to Text: Preview of our speech to text system where you can transcribe recordings and audio files into text
5378+
UI_TEXT_CONTENT["AISTUDIO::SETTINGS::DATAMODEL::PREVIEWFEATURESEXTENSIONS::T221133923"] = "Speech to Text: Preview of our speech to text system where you can transcribe recordings and audio files into text"
5379+
53715380
-- RAG: Preview of our RAG implementation where you can refer your files or integrate enterprise data within your company
53725381
UI_TEXT_CONTENT["AISTUDIO::SETTINGS::DATAMODEL::PREVIEWFEATURESEXTENSIONS::T2708939138"] = "RAG: Preview of our RAG implementation where you can refer your files or integrate enterprise data within your company"
53735382

app/MindWork AI Studio/Chat/IImageSourceExtensions.cs

Lines changed: 24 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
using AIStudio.Tools.MIME;
12
using AIStudio.Tools.PluginSystem;
23

34
namespace AIStudio.Chat;
@@ -6,7 +7,7 @@ public static class IImageSourceExtensions
67
{
78
private static string TB(string fallbackEN) => I18N.I.T(fallbackEN, typeof(IImageSourceExtensions).Namespace, nameof(IImageSourceExtensions));
89

9-
public static string DetermineMimeType(this IImageSource image)
10+
public static MIMEType DetermineMimeType(this IImageSource image)
1011
{
1112
switch (image.SourceType)
1213
{
@@ -18,52 +19,48 @@ public static string DetermineMimeType(this IImageSource image)
1819
{
1920
var mimeEnd = base64Data.IndexOf(';');
2021
if (mimeEnd > 5)
21-
{
22-
return base64Data[5..mimeEnd];
23-
}
22+
return Builder.FromTextRepresentation(base64Data[5..mimeEnd]);
2423
}
2524

2625
// Fallback:
27-
return "application/octet-stream";
26+
return Builder.Create().UseApplication().UseSubtype(ApplicationSubtype.OCTET_STREAM).Build();
2827
}
2928

3029
case ContentImageSource.URL:
3130
{
3231
// Try to detect the mime type from the URL extension:
3332
var uri = new Uri(image.Source);
3433
var extension = Path.GetExtension(uri.AbsolutePath).ToLowerInvariant();
35-
return extension switch
36-
{
37-
".png" => "image/png",
38-
".jpg" or ".jpeg" => "image/jpeg",
39-
".gif" => "image/gif",
40-
".bmp" => "image/bmp",
41-
".webp" => "image/webp",
42-
43-
_ => "application/octet-stream"
44-
};
34+
return DeriveMIMETypeFromExtension(extension);
4535
}
4636

4737
case ContentImageSource.LOCAL_PATH:
4838
{
4939
var extension = Path.GetExtension(image.Source).ToLowerInvariant();
50-
return extension switch
51-
{
52-
".png" => "image/png",
53-
".jpg" or ".jpeg" => "image/jpeg",
54-
".gif" => "image/gif",
55-
".bmp" => "image/bmp",
56-
".webp" => "image/webp",
57-
58-
_ => "application/octet-stream"
59-
};
40+
return DeriveMIMETypeFromExtension(extension);
6041
}
6142

6243
default:
63-
return "application/octet-stream";
44+
return Builder.Create().UseApplication().UseSubtype(ApplicationSubtype.OCTET_STREAM).Build();
6445
}
6546
}
66-
47+
48+
private static MIMEType DeriveMIMETypeFromExtension(string extension)
49+
{
50+
var imageBuilder = Builder.Create().UseImage();
51+
return extension switch
52+
{
53+
".png" => imageBuilder.UseSubtype(ImageSubtype.PNG).Build(),
54+
".jpg" or ".jpeg" => imageBuilder.UseSubtype(ImageSubtype.JPEG).Build(),
55+
".gif" => imageBuilder.UseSubtype(ImageSubtype.GIF).Build(),
56+
".webp" => imageBuilder.UseSubtype(ImageSubtype.WEBP).Build(),
57+
".tiff" or ".tif" => imageBuilder.UseSubtype(ImageSubtype.TIFF).Build(),
58+
".heic" or ".heif" => imageBuilder.UseSubtype(ImageSubtype.HEIC).Build(),
59+
60+
_ => Builder.Create().UseApplication().UseSubtype(ApplicationSubtype.OCTET_STREAM).Build()
61+
};
62+
}
63+
6764
/// <summary>
6865
/// Read the image content as a base64 string.
6966
/// </summary>
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
@using AIStudio.Settings.DataModel
2+
3+
@namespace AIStudio.Components
4+
@inherits MSGComponentBase
5+
6+
@if (PreviewFeatures.PRE_SPEECH_TO_TEXT_2026.IsEnabled(this.SettingsManager))
7+
{
8+
<MudTooltip Text="@this.Tooltip">
9+
<MudToggleIconButton Toggled="@this.isRecording"
10+
ToggledChanged="@this.OnRecordingToggled"
11+
Icon="@Icons.Material.Filled.Mic"
12+
ToggledIcon="@Icons.Material.Filled.Stop"
13+
Color="Color.Primary"
14+
ToggledColor="Color.Error"/>
15+
</MudTooltip>
16+
}
Lines changed: 197 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,197 @@
1+
using AIStudio.Tools.MIME;
2+
using AIStudio.Tools.Services;
3+
4+
using Microsoft.AspNetCore.Components;
5+
6+
namespace AIStudio.Components;
7+
8+
public partial class VoiceRecorder : MSGComponentBase
9+
{
10+
[Inject]
11+
private ILogger<VoiceRecorder> Logger { get; init; } = null!;
12+
13+
[Inject]
14+
private IJSRuntime JsRuntime { get; init; } = null!;
15+
16+
[Inject]
17+
private RustService RustService { get; init; } = null!;
18+
19+
private uint numReceivedChunks;
20+
private bool isRecording;
21+
private FileStream? currentRecordingStream;
22+
private string? currentRecordingPath;
23+
private string? currentRecordingMimeType;
24+
private DotNetObjectReference<VoiceRecorder>? dotNetReference;
25+
26+
private string Tooltip => this.isRecording ? T("Stop recording and start transcription") : T("Start recording your voice for a transcription");
27+
28+
private async Task OnRecordingToggled(bool toggled)
29+
{
30+
if (toggled)
31+
{
32+
var mimeTypes = GetPreferredMimeTypes(
33+
Builder.Create().UseAudio().UseSubtype(AudioSubtype.OGG).Build(),
34+
Builder.Create().UseAudio().UseSubtype(AudioSubtype.AAC).Build(),
35+
Builder.Create().UseAudio().UseSubtype(AudioSubtype.MP3).Build(),
36+
Builder.Create().UseAudio().UseSubtype(AudioSubtype.AIFF).Build(),
37+
Builder.Create().UseAudio().UseSubtype(AudioSubtype.WAV).Build(),
38+
Builder.Create().UseAudio().UseSubtype(AudioSubtype.FLAC).Build()
39+
);
40+
41+
this.Logger.LogInformation("Starting audio recording with preferred MIME types: '{PreferredMimeTypes}'.", string.Join<MIMEType>(", ", mimeTypes));
42+
43+
// Create a DotNetObjectReference to pass to JavaScript:
44+
this.dotNetReference = DotNetObjectReference.Create(this);
45+
46+
// Initialize the file stream for writing chunks:
47+
await this.InitializeRecordingStream();
48+
49+
var mimeTypeStrings = mimeTypes.ToStringArray();
50+
var actualMimeType = await this.JsRuntime.InvokeAsync<string>("audioRecorder.start", this.dotNetReference, mimeTypeStrings);
51+
52+
// Store the MIME type for later use:
53+
this.currentRecordingMimeType = actualMimeType;
54+
55+
this.Logger.LogInformation("Audio recording started with MIME type: '{ActualMimeType}'.", actualMimeType);
56+
this.isRecording = true;
57+
}
58+
else
59+
{
60+
var result = await this.JsRuntime.InvokeAsync<AudioRecordingResult>("audioRecorder.stop");
61+
if (result.ChangedMimeType)
62+
this.Logger.LogWarning("The recorded audio MIME type was changed to '{ResultMimeType}'.", result.MimeType);
63+
64+
// Close and finalize the recording stream:
65+
await this.FinalizeRecordingStream();
66+
67+
this.isRecording = false;
68+
this.StateHasChanged();
69+
}
70+
}
71+
72+
private static MIMEType[] GetPreferredMimeTypes(params MIMEType[] mimeTypes)
73+
{
74+
// Default list if no parameters provided:
75+
if (mimeTypes.Length is 0)
76+
{
77+
var audioBuilder = Builder.Create().UseAudio();
78+
return
79+
[
80+
audioBuilder.UseSubtype(AudioSubtype.WEBM).Build(),
81+
audioBuilder.UseSubtype(AudioSubtype.OGG).Build(),
82+
audioBuilder.UseSubtype(AudioSubtype.MP4).Build(),
83+
audioBuilder.UseSubtype(AudioSubtype.MPEG).Build(),
84+
];
85+
}
86+
87+
return mimeTypes;
88+
}
89+
90+
private async Task InitializeRecordingStream()
91+
{
92+
this.numReceivedChunks = 0;
93+
var dataDirectory = await this.RustService.GetDataDirectory();
94+
var recordingDirectory = Path.Combine(dataDirectory, "audioRecordings");
95+
if (!Directory.Exists(recordingDirectory))
96+
Directory.CreateDirectory(recordingDirectory);
97+
98+
var fileName = $"recording_{DateTime.UtcNow:yyyyMMdd_HHmmss}.audio";
99+
this.currentRecordingPath = Path.Combine(recordingDirectory, fileName);
100+
this.currentRecordingStream = new FileStream(this.currentRecordingPath, FileMode.Create, FileAccess.Write, FileShare.None, bufferSize: 8192, useAsync: true);
101+
102+
this.Logger.LogInformation("Initialized audio recording stream: '{RecordingPath}'.", this.currentRecordingPath);
103+
}
104+
105+
[JSInvokable]
106+
public async Task OnAudioChunkReceived(byte[] chunkBytes)
107+
{
108+
if (this.currentRecordingStream is null)
109+
{
110+
this.Logger.LogWarning("Received audio chunk but no recording stream is active.");
111+
return;
112+
}
113+
114+
try
115+
{
116+
this.numReceivedChunks++;
117+
await this.currentRecordingStream.WriteAsync(chunkBytes);
118+
await this.currentRecordingStream.FlushAsync();
119+
120+
this.Logger.LogDebug("Wrote {ByteCount} bytes to recording stream.", chunkBytes.Length);
121+
}
122+
catch (Exception ex)
123+
{
124+
this.Logger.LogError(ex, "Error writing audio chunk to stream.");
125+
}
126+
}
127+
128+
private async Task FinalizeRecordingStream()
129+
{
130+
if (this.currentRecordingStream is not null)
131+
{
132+
await this.currentRecordingStream.FlushAsync();
133+
await this.currentRecordingStream.DisposeAsync();
134+
this.currentRecordingStream = null;
135+
136+
// Rename the file with the correct extension based on MIME type:
137+
if (this.currentRecordingPath is not null && this.currentRecordingMimeType is not null)
138+
{
139+
var extension = GetFileExtension(this.currentRecordingMimeType);
140+
var newPath = Path.ChangeExtension(this.currentRecordingPath, extension);
141+
142+
if (File.Exists(this.currentRecordingPath))
143+
{
144+
File.Move(this.currentRecordingPath, newPath, overwrite: true);
145+
this.Logger.LogInformation("Finalized audio recording over {NumChunks} streamed audio chunks to the file '{RecordingPath}'.", this.numReceivedChunks, newPath);
146+
}
147+
}
148+
}
149+
150+
this.currentRecordingPath = null;
151+
this.currentRecordingMimeType = null;
152+
153+
// Dispose the .NET reference:
154+
this.dotNetReference?.Dispose();
155+
this.dotNetReference = null;
156+
}
157+
158+
private static string GetFileExtension(string mimeType)
159+
{
160+
var baseMimeType = mimeType.Split(';')[0].Trim().ToLowerInvariant();
161+
return baseMimeType switch
162+
{
163+
"audio/webm" => ".webm",
164+
"audio/ogg" => ".ogg",
165+
"audio/mp4" => ".m4a",
166+
"audio/mpeg" => ".mp3",
167+
"audio/wav" => ".wav",
168+
"audio/x-wav" => ".wav",
169+
_ => ".audio" // Fallback
170+
};
171+
}
172+
173+
private sealed class AudioRecordingResult
174+
{
175+
public string MimeType { get; init; } = string.Empty;
176+
177+
public bool ChangedMimeType { get; init; }
178+
}
179+
180+
#region Overrides of MSGComponentBase
181+
182+
protected override void DisposeResources()
183+
{
184+
// Clean up recording resources if still active:
185+
if (this.currentRecordingStream is not null)
186+
{
187+
this.currentRecordingStream.Dispose();
188+
this.currentRecordingStream = null;
189+
}
190+
191+
this.dotNetReference?.Dispose();
192+
this.dotNetReference = null;
193+
base.DisposeResources();
194+
}
195+
196+
#endregion
197+
}

app/MindWork AI Studio/Layout/MainLayout.razor

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
@using AIStudio.Settings.DataModel
2+
@using AIStudio.Components
3+
24
@using Microsoft.AspNetCore.Components.Routing
35
@using MudBlazor
46

@@ -20,12 +22,20 @@
2022
</MudNavLink>
2123
}
2224
</MudNavMenu>
25+
26+
<MudSpacer/>
27+
28+
<MudStack AlignItems="AlignItems.Center">
29+
<MudToolBar WrapContent="true">
30+
<VoiceRecorder />
31+
</MudToolBar>
32+
</MudStack>
2333
</MudDrawer>
2434
</MudDrawerContainer>
2535
}
2636
else
2737
{
28-
<MudPaper Width="4em" Class="mud-height-full absolute">
38+
<MudPaper Width="4em" Class="mud-height-full absolute" Style="display: flex; flex-direction: column;">
2939
<MudNavMenu>
3040
@foreach (var navBarItem in this.navItems)
3141
{
@@ -41,6 +51,14 @@
4151
}
4252
}
4353
</MudNavMenu>
54+
55+
<MudSpacer/>
56+
57+
<MudStack AlignItems="AlignItems.Center">
58+
<MudToolBar WrapContent="true">
59+
<VoiceRecorder />
60+
</MudToolBar>
61+
</MudStack>
4462
</MudPaper>
4563
}
4664
}

0 commit comments

Comments
 (0)