added silence detection

Matze99 · Matze99 · commit ef61b8816473 · 2024-11-06T12:11:01.000-08:00
diff --git a/playground/main.ts b/playground/main.ts
@@ -32,23 +32,14 @@ const image = await composition.add(
   })
 );
 
-const track = composition.findTracks(core.VideoTrack).at(0);
+const audioTrack = new core.AudioTrack();
 
-console.log(await track?.detectSilences());
-
-const track2 = new core.VideoTrack();
-
-const video_tutorial = await new core.VideoClip(await core.VideoSource
-    .from('/tutorial.mp4'), {
+const audioTest = await new core.AudioClip(await core.AudioSource
+    .from('/silences.mp3'), {
     volume: 0.1,
-    anchor: 0.5,
-    position: 'center',
-    height: '100%',
   });
 
-await track2.add(video_tutorial);
-
-console.log("video_tutorial", await track2.detectSilences());
+await audioTrack.add(audioTest);
 
 image.animate()
   .rotation(-16).to(14, 5).to(-7, 10).to(24, 7).to(-3, 9).to(19, 7).to(-14, 12).to(5, 9).to(-30, 13)
diff --git a/public/silences.mp3 b/public/silences.mp3
diff --git a/src/tracks/media/media.spec.ts b/src/tracks/media/media.spec.ts
@@ -10,6 +10,147 @@ import { Composition } from '../../composition';
 import { MediaClip } from '../../clips';
 import { Timestamp } from '../../models';
 import { MediaTrack } from './media';
+import { AudioSource } from '../../sources';
+import { getSilenceArrayBuffer } from './media.utils';
+
+
+// Mocking the OfflineAudioContext class for silence detection
+class MockSilenceAudioContext {
+	constructor(public numberOfChannels: number, public length: number, public sampleRate: number) { }
+
+	decodeAudioData(_: ArrayBuffer): Promise<AudioBuffer> {
+		const audioBuffer = {
+			duration: 5, // Mock duration
+			sampleRate: 1000,
+			length: 5000,
+			getChannelData: () => new Float32Array(5000).fill(0.5), // Return a dummy Float32Array
+		} as any as AudioBuffer;
+		return Promise.resolve(audioBuffer);
+	}
+
+	close() {
+		return Promise.resolve();
+	}
+}
+
+vi.stubGlobal('AudioContext', MockSilenceAudioContext); // Stub the global OfflineAudioContext
+
+describe('Get silence array buffer', () => {
+	it('should get silence array buffer', async () => {
+		const audioBuffer = {
+			duration: 5, // Mock duration
+			sampleRate: 44100,
+			length: 5 * 44100,
+			getChannelData: () => {
+				const totalLength = 5 * 44100;
+				return Float32Array.from({length: totalLength}, (_, i) => {
+					if (i < 2 * 44100) {
+						return 1;
+					}
+					else if (i >= 3 * 44100) {
+						return -1;
+					}
+					return 0;
+				});
+			},
+		} as any as AudioBuffer;
+		const silences = getSilenceArrayBuffer(audioBuffer, 44100, 1, -50, 0);
+		expect(silences).toEqual([{
+			start: 0,
+			stop: 2
+		},
+		{
+			start: 3,
+			stop: 5
+		}]);
+	});
+
+	it('no silence in getSilenceArrayBuffer', () => {
+		const audioBuffer = {
+			duration: 5, // Mock duration
+			sampleRate: 44100,
+			length: 5 * 44100,
+			getChannelData: (i: number) => new Float32Array(5 * 44100).fill(0),
+		} as any as AudioBuffer;
+		const silences = getSilenceArrayBuffer(audioBuffer, 1024, 1, -50, 0);
+		expect(silences).toEqual([]);
+	});
+
+	it('only silence in getSilenceArrayBuffer', () => {
+		const audioBuffer = {
+			duration: 5, // Mock duration
+			sampleRate: 44100,
+			length: 5 * 44100,
+			getChannelData: () => new Float32Array(5 * 44100).fill(1),
+		} as any as AudioBuffer;
+		const silences = getSilenceArrayBuffer(audioBuffer, 1024, 1, -50, 0);
+		expect(silences).toEqual([{
+			start: 0,
+			stop: 5
+		}]);
+	});
+
+	it('should throw error if no sample rate', () => {
+		const audioBuffer = {
+			sampleRate: undefined,
+		} as any as AudioBuffer;
+		expect(() => getSilenceArrayBuffer(audioBuffer, 1024, 1, -50, 0)).toThrow();
+	});
+});
+
+
+describe('Find silences in a track', () => {
+	let comp: Composition;
+	let track: MediaTrack<MediaClip>;
+	let file: File;
+	const updateMock = vi.fn();
+
+	beforeEach(() => {
+		// frame and seconds are the same
+		comp = new Composition();
+		file = new File([], "test.mp3");
+		track = comp.shiftTrack(new MediaTrack<MediaClip>());
+		track.on('update', updateMock);
+	});
+
+	it('empty track should have no silences', async () => {
+		const emptyTrack = new MediaTrack();
+		const silences = await track.detectSilences();
+		expect(silences).toEqual([]);
+	});
+
+	it('track with clip but no element should have no silences', async () => {
+		const clip = new MediaClip();
+		clip.source = await AudioSource.from(file);
+		await track.add(clip);
+		const silences = await track.detectSilences();
+		expect(silences).toEqual([]);
+	});
+
+	it('track with clip and element should find silences', async () => {
+		const clip = new MediaClip();
+		clip.source = await AudioSource.from(file);
+		clip.element = new Audio();
+		clip.duration.seconds = 5;
+		const clip2 = new MediaClip();
+		clip2.source = await AudioSource.from(file);
+		clip2.element = new Audio();
+		clip2.duration.seconds = 5;
+		clip2.offset.seconds = 5;
+		await track.add(clip);
+		await track.add(clip2);
+
+		const silences = await track.detectSilences();
+		expect(silences).toEqual([{
+			start: 0,
+			stop: 5
+		},
+		{
+			start: 5.001,
+			stop: 10.001000000000001
+		}]);
+	});
+});
 
 describe('The Media Track Object', () => {
 	let comp: Composition;
diff --git a/src/tracks/media/media.ts b/src/tracks/media/media.ts
@@ -9,6 +9,7 @@ import { Track } from '../track';
 
 import type { MediaClip } from '../../clips';
 import type { Timestamp } from '../../models';
+import { getSilenceArrayBuffer } from './media.utils';
 
 export class MediaTrack<Clip extends MediaClip> extends Track<MediaClip> {
 	public clips: Clip[] = [];
@@ -20,6 +21,9 @@ export class MediaTrack<Clip extends MediaClip> extends Track<MediaClip> {
 
 	/**
      * Detect periods of silence across all clips in the track
+     * 
+     * This currently only searches for silences in each clip individually
+     * 
      * @param subSample Number of samples to skip when analyzing audio (higher = faster but less accurate)
      * @param silenceThreshold Volume threshold in dB below which is considered silence
      * @param minSilenceDuration Minimum duration in seconds for a silence period to be included
@@ -41,79 +45,14 @@ export class MediaTrack<Clip extends MediaClip> extends Track<MediaClip> {
 
             // Get audio data for this clip
             const arrayBuffer = await (await clip.source.getFile()).arrayBuffer();
-            const audioBuffer = await audioContext.decodeAudioData(arrayBuffer);
-            const channelData = audioBuffer.getChannelData(0);
+			const audioBuffer = await audioContext.decodeAudioData(arrayBuffer);
 
-            // Process samples in chunks
-            const sampleSize = subSample;
-            const numChunks = Math.floor(channelData.length / sampleSize);
-            const minSilenceChunks = Math.floor(minSilenceDuration * audioBuffer.sampleRate / subSample);
+			silences.push(...getSilenceArrayBuffer(audioBuffer, subSample, minSilenceDuration, silenceThreshold, clip.start.seconds));
             
-            const decibelValues: number[] = [];
-            
-            for (let i = 0; i < numChunks; i++) {
-                const chunk = channelData.slice(i * sampleSize, (i + 1) * sampleSize);
-                const rms = Math.sqrt(chunk.reduce((sum, val) => sum + val * val, 0) / chunk.length);
-                const db = 20 * Math.log10(Math.max(rms, 1e-10));
-                decibelValues.push(db);
-            }
-
-            // Find silence periods in this clip
-            let silenceStart: number | null = null;
-            
-            for (let i = 0; i < decibelValues.length; i++) {
-                if (decibelValues[i] < silenceThreshold) {
-                    if (silenceStart === null) {
-                        silenceStart = i;
-                    }
-                } else if (silenceStart !== null) {
-                    const silenceDuration = i - silenceStart;
-                    if (silenceDuration >= minSilenceChunks) {
-                        // Convert chunk indices to seconds and adjust for clip offset
-                        const silenceStartTime = (silenceStart * sampleSize) / audioBuffer.sampleRate;
-                        const silenceStopTime = (i * sampleSize) / audioBuffer.sampleRate;
-                        
-                        silences.push({
-                            start: silenceStartTime + clip.start.seconds,
-                            stop: silenceStopTime + clip.start.seconds
-                        });
-                    }
-                    silenceStart = null;
-                }
-            }
-
-            // Handle silence at end of clip
-            if (silenceStart !== null) {
-                const silenceDuration = decibelValues.length - silenceStart;
-                if (silenceDuration >= minSilenceChunks || silenceDuration == decibelValues.length) {
-                    silences.push({
-                        start: (silenceStart * sampleSize) / audioBuffer.sampleRate + clip.start.seconds,
-                        stop: audioBuffer.duration + clip.start.seconds
-                    });
-                }
-            }
         }
 
         await audioContext.close();
 
-        // Sort silences by start time and merge overlapping periods
-        silences.sort((a, b) => a.start - b.start);
-        const mergedSilences: typeof silences = [];
-        
-        for (const silence of silences) {
-            if (mergedSilences.length === 0) {
-                mergedSilences.push(silence);
-                continue;
-            }
-
-            const lastSilence = mergedSilences[mergedSilences.length - 1];
-            if (silence.start <= lastSilence.stop) {
-                // Merge overlapping silence periods
-                lastSilence.stop = Math.max(lastSilence.stop, silence.stop);
-            } else {
-                mergedSilences.push(silence);
-            }
-        }
-
-        return mergedSilences;
+        return silences.sort((a, b) => a.start - b.start);
     }
+}
diff --git a/src/tracks/media/media.utils.ts b/src/tracks/media/media.utils.ts
@@ -0,0 +1,91 @@
+/**
+ * Get the array buffer for silence detection
+ * 
+ * @param audioBuffer - The audio buffer to process
+ * @param subSample - The number of samples to process at a time
+ * @param minSilenceDuration - The minimum duration of silence to detect
+ * @param silenceThreshold - The threshold for silence detection (in decibels < 0)
+ * @param offsetSeconds - The offset in seconds to apply to the detected silences
+ */
+export function getSilenceArrayBuffer(
+    audioBuffer: AudioBuffer,
+    subSample: number,
+    minSilenceDuration: number,
+    silenceThreshold: number,
+    offsetSeconds: number
+) {
+    if (audioBuffer.sampleRate === undefined) {
+        throw new Error("Audio buffer has no sample rate");
+    }
+
+    const channelData = audioBuffer.getChannelData(0);
+    console.log("max", channelData.reduce((max, val) => Math.max(max, val), 0));
+    console.log("min", channelData.reduce((min, val) => Math.min(min, val), 0));
+
+    // Process samples in chunks
+    const sampleSize = subSample;
+    const numChunks = Math.floor(channelData.length / sampleSize);
+    const minSilenceChunks = Math.floor(
+        (minSilenceDuration * audioBuffer.sampleRate) / subSample
+    );
+
+    const decibelValues: number[] = [];
+    const silences: { start: number; stop: number }[] = [];
+
+    for (let i = 0; i < numChunks; i++) {
+        const chunk = channelData.slice(i * sampleSize, (i + 1) * sampleSize);
+        const rms = Math.sqrt(
+            chunk.reduce((sum, val) => sum + val * val, 0) / chunk.length
+        );
+        const db = 20 * Math.log10(Math.max(rms, 1e-10));
+        decibelValues.push(db);
+    }
+
+    console.log("decibel", decibelValues);
+
+    // Find silence periods in this clip
+    let silenceStart: number | null = null;
+
+    for (let i = 0; i < decibelValues.length; i++) {
+        if (decibelValues[i] > silenceThreshold) {
+            if (silenceStart === null) {
+                silenceStart = i;
+            }
+        } else if (silenceStart !== null) {
+            const silenceDuration = i - silenceStart;
+            if (silenceDuration >= minSilenceChunks) {
+                // Convert chunk indices to seconds and adjust for clip offset
+                const silenceStartTime =
+                    (silenceStart * sampleSize) / audioBuffer.sampleRate;
+                const silenceStopTime =
+                    (i * sampleSize) / audioBuffer.sampleRate;
+
+                silences.push({
+                    start: silenceStartTime + offsetSeconds,
+                    stop: silenceStopTime + offsetSeconds,
+                });
+            }
+            silenceStart = null;
+        }
+    }
+
+    // Handle silence at end of clip
+    if (silenceStart !== null) {
+        const silenceDuration = decibelValues.length - silenceStart;
+        if (
+            silenceDuration >= minSilenceChunks ||
+            silenceDuration == decibelValues.length
+        ) {
+            silences.push({
+                start:
+                    (silenceStart * sampleSize) / audioBuffer.sampleRate +
+                    offsetSeconds,
+                stop:
+                    (channelData.length) / audioBuffer.sampleRate +
+                    offsetSeconds,
+            });
+        }
+    }
+
+    return silences;
+}