Skip to content

Commit 998002a

Browse files
committed
refactor: fixing FFT Avx & simplifying VAD.
- Replace Ko-fi with AirTM for donations (since PayPal is unavailable in my region). - Simplify Recorder and VAD components - Remove VAD integration from Recorder component. - Simplify VoiceActivityDetector for basic energy detection. - Add modifier/analyzer management to Recorder. feat(experimental): Introduce advanced VAD - Add experimental VoiceActivityDetector with noise gate to Experimental namespace. - Implement combined detection mode in advanced VAD. - Add configuration options for advanced VAD. - Move configurePlayer invocation after adding component to mixer to avoid potential issues. - Move NoiseReductionModifier to Experimental namespace. - Indicate it's under development and not stable. - Introduce MathHelper.IsPowerOfTwo for power of two check. - Add MathHelper.Lerp for linear interpolation. - Optimize FFT algorithm by adding scalar implementation for smaller sizes. - Improve complex multiplication using AVX for performance.
1 parent a50ad7f commit 998002a

File tree

11 files changed

+923
-385
lines changed

11 files changed

+923
-385
lines changed

.github/FUNDING.yml

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
# These are supported funding model platforms
22

3-
github: [LSXPrime]
4-
ko_fi: lsxprime
5-
custom: [https://app.binance.com/qr/dplk0837ff4256a64749a2b10dfe3ea5a0b9]
3+
github: LSXPrime
4+
custom: ["https://airtm.me/lsxprime", "https://app.binance.com/qr/dplk0837ff4256a64749a2b10dfe3ea5a0b9"]

.github/workflows/greetings.yml

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
name: Greetings
2+
3+
on: pull_request_target
4+
5+
jobs:
6+
greeting:
7+
runs-on: ubuntu-latest
8+
permissions:
9+
pull-requests: write
10+
steps:
11+
- uses: actions/first-interaction@v1
12+
with:
13+
repo-token: ${{ secrets.GITHUB_TOKEN }}
14+
pr-message: |
15+
Welcome to SoundFlow repository! We appreciate you taking the time to contribute.
16+
17+
We're excited to review your pull request and look forward to collaborating with you. Please let us know if you have any questions or need any assistance.
18+
19+
Thank you for your contribution!

.github/workflows/stale.yml

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
# This workflow warns and then closes issues and PRs that have had no activity for a specified amount of time.
2+
#
3+
# You can adjust the behavior by modifying this file.
4+
# For more information, see:
5+
# https://github.com/actions/stale
6+
name: Mark stale issues and pull requests
7+
8+
on:
9+
schedule:
10+
- cron: '44 8 * * *'
11+
12+
jobs:
13+
stale:
14+
15+
runs-on: ubuntu-latest
16+
permissions:
17+
issues: write
18+
pull-requests: write
19+
20+
steps:
21+
- uses: actions/stale@v5
22+
with:
23+
repo-token: ${{ secrets.GITHUB_TOKEN }}
24+
days-before-stale: 14
25+
days-before-close: 7
26+
stale-issue-message: |
27+
This issue has been automatically marked as stale because it has not had recent activity.
28+
It will be closed if no further activity occurs.
29+
30+
If this issue is still relevant, please leave a comment indicating that you would like it to remain open.
31+
Thank you for your contributions.
32+
stale-pr-message: |
33+
This pull request has been automatically marked as stale because it has not had recent activity.
34+
It will be closed if no further activity occurs.
35+
36+
If you are still working on this pull request, please leave a comment indicating that you would like it to remain open.
37+
Thank you for your contributions.
38+
stale-issue-label: 'no-issue-activity'
39+
stale-pr-label: 'no-pr-activity'

README.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -147,9 +147,10 @@ Beyond equipment, your contributions, no matter the size, help to:
147147

148148
You can directly support SoundFlow and help me get essential headphones through:
149149

150-
* **Ko-fi:** For simple one-time donations or flexible recurring support.
150+
* **AirTM:** For simple one-time donations with various payment options like Direct Bank Transfer (ACH), Debit / Credit Card via Moonpay, Stablecoins, and more than 500 banks and e-wallets.
151+
152+
[Donate using AirTM](https://airtm.me/lsxprime)
151153

152-
[![ko-fi](https://img.shields.io/badge/Ko--fi-F16061?style=for-the-badge&logo=ko-fi&logoColor=white)](https://ko-fi.com/lsxprime)
153154

154155
* **Binance Pay (Crypto - Preferred):** Support with cryptocurrency via Binance Pay.
155156

Samples/SoundFlow.SimplePlayer/ComponentTests.cs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,11 @@
22
using SoundFlow.Backends.MiniAudio;
33
using SoundFlow.Components;
44
using SoundFlow.Enums;
5+
using SoundFlow.Experimental;
56
using SoundFlow.Modifiers;
67
using SoundFlow.Providers;
78
using SoundFlow.Visualization;
9+
using VoiceActivityDetector = SoundFlow.Components.VoiceActivityDetector;
810

911
namespace SoundFlow.SimplePlayer;
1012

@@ -169,7 +171,7 @@ private static void TestVoiceActivityDetector()
169171

170172
var microphoneProvider = new MicrophoneDataProvider();
171173
var soundPlayer = new SoundPlayer(microphoneProvider); // Play microphone input
172-
soundPlayer.ConnectInput(vad); // VAD connected to microphone input
174+
soundPlayer.AddAnalyzer(vad); // VAD connected to microphone input
173175
microphoneProvider.StartCapture();
174176
soundPlayer.Play();
175177

@@ -186,7 +188,7 @@ private static void TestVoiceActivityDetector()
186188
// Ignore as it will throw exception if soundPlayer since it's seeking to 0 on stop but MicrophoneDataProvider doesn't support seeking
187189
}
188190

189-
soundPlayer.DisconnectInput(vad);
191+
soundPlayer.RemoveAnalyzer(vad);
190192
microphoneProvider.Dispose();
191193

192194
// Reinitialize audio engine for playback

Samples/SoundFlow.SimplePlayer/Program.cs

Lines changed: 9 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,12 @@
33
using SoundFlow.Backends.MiniAudio;
44
using SoundFlow.Components;
55
using SoundFlow.Enums;
6+
using SoundFlow.Experimental;
67
using SoundFlow.Interfaces;
78
using SoundFlow.Modifiers;
89
using SoundFlow.Providers;
910
using SoundFlow.Visualization;
11+
using VoiceActivityDetector = SoundFlow.Components.VoiceActivityDetector;
1012

1113
namespace SoundFlow.SimplePlayer;
1214

@@ -101,7 +103,6 @@ private static void PlayAudio(ISoundDataProvider dataProvider, bool isSurround =
101103
SetOrCreateEngine();
102104
ISoundPlayer soundPlayer = isSurround ? new SurroundPlayer(dataProvider) : new SoundPlayer(dataProvider);
103105
SoundComponent component = isSurround ? (SurroundPlayer)soundPlayer : (SoundPlayer)soundPlayer;
104-
configurePlayer?.Invoke(soundPlayer);
105106

106107
if (modifiers != null)
107108
{
@@ -112,6 +113,8 @@ private static void PlayAudio(ISoundDataProvider dataProvider, bool isSurround =
112113
}
113114

114115
Mixer.Master.AddComponent(component);
116+
configurePlayer?.Invoke(soundPlayer);
117+
115118
soundPlayer.Play();
116119

117120
PlaybackControls(soundPlayer);
@@ -268,23 +271,13 @@ private static void MixedRecordAndPlayback()
268271
private static void RecordAndPlaybackAudio()
269272
{
270273
SetOrCreateEngine(Capability.Record, 48000);
271-
272-
var vad = new VoiceActivityDetector(
273-
fftSize: 1024,
274-
minHangoverFrames: 65,
275-
maxHangoverFrames: 100,
276-
minAttackFrames: 1,
277-
maxAttackFrames: 5,
278-
alpha: 0.95f,
279-
spectralCentroidThreshold: 0.45f,
280-
spectralFlatnessThreshold: 0.5f,
281-
spectralFluxThreshold: 0.12f,
282-
energyThreshold: 0.0002f
283-
);
274+
275+
var vad = new VoiceActivityDetector(1024, 3f);
284276

285277
vad.SpeechDetected += isDetected => Console.WriteLine("Speech detected: " + isDetected);
286278

287-
using var recorder = new Recorder(RecordedFilePath, SampleFormat.F32, EncodingFormat.Wav, 48000, 2, vad);
279+
using var recorder = new Recorder(RecordedFilePath, SampleFormat.F32, EncodingFormat.Wav, 48000);
280+
recorder.AddAnalyzer(vad);
288281

289282
Console.WriteLine("Recording started. Press 's' to stop, 'p' to pause/resume.");
290283
recorder.StartRecording();
@@ -371,27 +364,14 @@ private static void PlayAudioWithNoiseReduction()
371364
Console.WriteLine("File not found.");
372365
return;
373366
}
374-
375-
var vad = new VoiceActivityDetector(
376-
fftSize: 512,
377-
minHangoverFrames: 10,
378-
maxHangoverFrames: 30,
379-
minAttackFrames: 1,
380-
maxAttackFrames: 5,
381-
alpha: 0.95f,
382-
spectralCentroidThreshold: 0.45f,
383-
spectralFlatnessThreshold: 0.5f,
384-
spectralFluxThreshold: 0.12f,
385-
energyThreshold: 0.0002f
386-
);
367+
387368

388369
var noiseReductionModifier = new NoiseReductionModifier(
389370
fftSize: 2048,
390371
alpha: 3f,
391372
beta: 0.001f,
392373
gain: 1.2f,
393374
noiseFrames: 50
394-
// vad: vad
395375
);
396376

397377
PlayAudio(new StreamDataProvider(new FileStream(noisyFilePath, FileMode.Open, FileAccess.Read)), modifiers: new List<SoundModifier> { noiseReductionModifier });

Src/Components/Recorder.cs

Lines changed: 72 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,13 @@
22
using SoundFlow.Enums;
33
using SoundFlow.Interfaces;
44
using SoundFlow.Exceptions;
5+
using System.Collections.ObjectModel;
56

67
namespace SoundFlow.Components;
78

89
/// <summary>
910
/// Component for recording audio data, either to a file or via a callback.
10-
/// Supports various sample and encoding formats and can integrate with a Voice Activity Detector (VAD).
11+
/// Supports various sample and encoding formats and can integrate with <see cref="SoundModifier"/> and <see cref="AudioAnalyzer"/> components for real-time processing and analysis during recording.
1112
/// Implements the <see cref="IDisposable"/> interface to ensure resources are released properly.
1213
/// </summary>
1314
public class Recorder : IDisposable
@@ -50,7 +51,8 @@ public class Recorder : IDisposable
5051
public AudioProcessCallback? ProcessCallback;
5152

5253
private ISoundEncoder? _encoder;
53-
private readonly VoiceActivityDetector? _vad;
54+
private readonly List<SoundModifier> _modifiers = [];
55+
private readonly List<AudioAnalyzer> _analyzers = [];
5456

5557
/// <summary>
5658
/// Initializes a new instance of the <see cref="Recorder"/> class to record audio to a file.
@@ -60,15 +62,12 @@ public class Recorder : IDisposable
6062
/// <param name="encodingFormat">The desired encoding format for the recorded audio file. Defaults to <see cref="EncodingFormat.Wav"/>.</param>
6163
/// <param name="sampleRate">The desired sample rate for recording, in samples per second. Defaults to 44100 Hz.</param>
6264
/// <param name="channels">The number of channels to record. Defaults to 2 (stereo).</param>
63-
/// <param name="vad">An optional <see cref="VoiceActivityDetector"/> to use for voice activity detection during recording. Defaults to null.</param>
6465
public Recorder(string filePath,
6566
SampleFormat sampleFormat = SampleFormat.F32,
6667
EncodingFormat encodingFormat = EncodingFormat.Wav,
6768
int sampleRate = 44100,
68-
int channels = 2,
69-
VoiceActivityDetector? vad = null)
69+
int channels = 2)
7070
{
71-
_vad = vad;
7271
SampleFormat = sampleFormat;
7372
EncodingFormat = encodingFormat;
7473
FilePath = filePath;
@@ -84,25 +83,32 @@ public Recorder(string filePath,
8483
/// <param name="encodingFormat">The encoding format (primarily for internal use or if an encoder is manually managed). Defaults to <see cref="EncodingFormat.Wav"/>.</param>
8584
/// <param name="sampleRate">The desired sample rate for recording, in samples per second. Defaults to 44100 Hz.</param>
8685
/// <param name="channels">The number of channels to record. Defaults to 2 (stereo).</param>
87-
/// <param name="vad">An optional <see cref="VoiceActivityDetector"/> to use for voice activity detection during recording. Defaults to null.</param>
8886
public Recorder(AudioProcessCallback callback,
8987
SampleFormat sampleFormat = SampleFormat.F32,
9088
EncodingFormat encodingFormat = EncodingFormat.Wav,
9189
int sampleRate = 44100,
92-
int channels = 2,
93-
VoiceActivityDetector? vad = null)
90+
int channels = 2)
9491
{
9592
ProcessCallback = callback;
96-
_vad = vad;
9793
SampleFormat = sampleFormat;
9894
EncodingFormat = encodingFormat;
9995
SampleRate = sampleRate;
10096
Channels = channels;
10197
}
10298

99+
/// <summary>
100+
/// Gets a read-only list of <see cref="SoundModifier"/> components applied to the recorder.
101+
/// </summary>
102+
public ReadOnlyCollection<SoundModifier> Modifiers => _modifiers.AsReadOnly();
103+
104+
/// <summary>
105+
/// Gets a read-only list of <see cref="AudioAnalyzer"/> components applied to the recorder.
106+
/// </summary>
107+
public ReadOnlyCollection<AudioAnalyzer> Analyzers => _analyzers.AsReadOnly();
108+
103109
/// <summary>
104110
/// Starts the audio recording process.
105-
/// If recording to a file, it initializes the audio encoder. If using a VAD, it starts monitoring for voice activity.
111+
/// If recording to a file, it initializes the audio encoder.
106112
/// </summary>
107113
/// <exception cref="ArgumentException">Thrown if both <see cref="FilePath"/> and <see cref="ProcessCallback"/> are invalid (e.g., <see cref="FilePath"/> is null or empty and <see cref="ProcessCallback"/> is null).</exception>
108114
/// <exception cref="BackendException">Thrown if creating the audio encoder fails when recording to a file.</exception>
@@ -124,17 +130,6 @@ public void StartRecording()
124130

125131
AudioEngine.OnAudioProcessed += OnOnAudioProcessed;
126132
State = PlaybackState.Playing;
127-
128-
if (_vad != null)
129-
{
130-
_vad.SpeechDetected += isDetected =>
131-
{
132-
if (isDetected)
133-
ResumeRecording();
134-
else
135-
PauseRecording();
136-
};
137-
}
138133
}
139134

140135
/// <summary>
@@ -179,19 +174,70 @@ public void StopRecording()
179174
State = PlaybackState.Stopped;
180175
}
181176

177+
/// <summary>
178+
/// Adds a <see cref="SoundModifier"/> to the recording pipeline.
179+
/// Modifiers are applied to the audio data before encoding or processing via callback.
180+
/// </summary>
181+
/// <param name="modifier">The modifier to add.</param>
182+
public void AddModifier(SoundModifier modifier)
183+
{
184+
_modifiers.Add(modifier);
185+
}
186+
187+
/// <summary>
188+
/// Removes a <see cref="SoundModifier"/> from the recording pipeline.
189+
/// </summary>
190+
/// <param name="modifier">The modifier to remove.</param>
191+
public void RemoveModifier(SoundModifier modifier)
192+
{
193+
_modifiers.Remove(modifier);
194+
}
195+
196+
/// <summary>
197+
/// Adds an <see cref="AudioAnalyzer"/> to the recording pipeline.
198+
/// Analyzers can be used to process and extract data from the audio during recording.
199+
/// </summary>
200+
/// <param name="analyzer">The analyzer to add.</param>
201+
public void AddAnalyzer(AudioAnalyzer analyzer)
202+
{
203+
_analyzers.Add(analyzer);
204+
}
205+
206+
/// <summary>
207+
/// Removes an <see cref="AudioAnalyzer"/> from the recording pipeline.
208+
/// </summary>
209+
/// <param name="analyzer">The analyzer to remove.</param>
210+
public void RemoveAnalyzer(AudioAnalyzer analyzer)
211+
{
212+
_analyzers.Remove(analyzer);
213+
}
214+
182215
/// <summary>
183216
/// Handles the audio processed event from the audio engine.
184217
/// This method is invoked by the audio engine when new audio samples are available.
185-
/// It processes the samples through the VAD (if enabled), checks the current state, invokes the <see cref="ProcessCallback"/> (if set), and encodes the samples using the <see cref="_encoder"/> (if recording to a file).
218+
/// It processes the samples through the added <see cref="SoundModifier"/> and <see cref="AudioAnalyzer"/> components, checks the current state, invokes the <see cref="ProcessCallback"/> (if set), and encodes the samples using the <see cref="_encoder"/> (if recording to a file).
186219
/// </summary>
187220
/// <param name="samples">A span containing the audio samples to process.</param>
188221
/// <param name="capability">The audio capability associated with the processed samples (e.g., input or output).</param>
189222
private void OnOnAudioProcessed(Span<float> samples, Capability capability)
190223
{
191-
_vad?.Process(samples);
192224
if (State != PlaybackState.Playing)
193225
return;
194226

227+
// Apply modifiers
228+
foreach (var modifier in _modifiers)
229+
{
230+
if (modifier.Enabled)
231+
modifier.Process(samples);
232+
}
233+
234+
// Process analyzers
235+
foreach (var analyzer in _analyzers)
236+
{
237+
analyzer.Process(samples);
238+
}
239+
240+
// Pass samples
195241
ProcessCallback?.Invoke(samples, capability);
196242
_encoder?.Encode(samples);
197243
}
@@ -202,6 +248,8 @@ public void Dispose()
202248
StopRecording();
203249
AudioEngine.OnAudioProcessed -= OnOnAudioProcessed;
204250
ProcessCallback = null;
251+
_modifiers.Clear();
252+
_analyzers.Clear();
205253
GC.SuppressFinalize(this);
206254
}
207255
}

0 commit comments

Comments
 (0)