diffusionstudio
diff --git a/‎playground/main.ts‎
Lines changed: 11 additions & 0 deletions b/‎playground/main.ts‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎public/harvard.MP3‎
367 KB b/‎public/harvard.MP3‎
367 KB
diff --git a/‎public/silences.mp3‎
43.1 KB b/‎public/silences.mp3‎
43.1 KB
diff --git a/‎src/clips/media/media.ts‎
Lines changed: 17 additions & 15 deletions b/‎src/clips/media/media.ts‎
Lines changed: 17 additions & 15 deletions
diff --git a/‎src/clips/video/video.ts‎
Lines changed: 5 additions & 5 deletions b/‎src/clips/video/video.ts‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎src/sources/audio.spec.ts‎
Lines changed: 62 additions & 0 deletions b/‎src/sources/audio.spec.ts‎
Lines changed: 62 additions & 0 deletions
diff --git a/‎src/sources/audio.ts‎
Lines changed: 40 additions & 8 deletions b/‎src/sources/audio.ts‎
Lines changed: 40 additions & 8 deletions
diff --git a/‎src/sources/audio.types.ts‎
Lines changed: 31 additions & 16 deletions b/‎src/sources/audio.types.ts‎
Lines changed: 31 additions & 16 deletions
@@ -42,6 +42,16 @@ const image = await composition.add(
   })
 );
 
+const audioTrack = composition.createTrack('audio').stacked(true);
+const audioSource = await core.AudioSource.from('/harvard.MP3');
+await audioTrack.add(
+  await new core.AudioClip(audioSource)
+);
+await audioTrack.removeSilences({
+  minDuration: 300,
+  windowSize: 1,
+});
+
 image.animate()
   .rotation(-16).to(14, 5).to(-7, 10).to(24, 7).to(-3, 9).to(19, 7).to(-14, 12).to(5, 9).to(-30, 13)
   .translateX(1700, 0, 'easeOut').to(-1400, 40)
@@ -61,6 +71,7 @@ await composition.add(
 
 (await composition.add(
   new core.AudioClip(await core.AudioSource.from('/audio.mp3'), {
+    muted: true,
     transcript: core.Transcript.fromJSON(captions).optimize(),
   })
 )).addCaptions();
 
@@ -1,14 +1,14 @@
 /**
  * Copyright (c) 2024 The Diffusion Studio Authors
  *
- * This Source Code Form is subject to the terms of the Mozilla 
+ * This Source Code Form is subject to the terms of the Mozilla
  * Public License, v. 2.0 that can be found in the LICENSE file.
  */
 
 import { Timestamp, Transcript } from '../../models';
 import { AudioSource } from '../../sources';
 import { RangeDeserializer } from './media.deserializer';
-import { serializable, } from '../../services';
+import { serializable } from '../../services';
 import { replaceKeyframes } from '../clip/clip.utils';
 import { ReferenceError, ValidationError } from '../../errors';
 import { Clip } from '../clip';
@@ -17,7 +17,6 @@ import type { CaptionPresetStrategy, CaptionTrack } from '../../tracks';
 import type { float, frame } from '../../types';
 import type { MediaClipProps } from './media.interfaces';
 
-
 export class MediaClip<Props extends MediaClipProps = MediaClipProps> extends Clip<MediaClipProps> {
 	public source = new AudioSource();
 	public declare element?: HTMLAudioElement | HTMLVideoElement;
@@ -54,7 +53,7 @@ export class MediaClip<Props extends MediaClipProps = MediaClipProps> extends Cl
 	@serializable(Transcript)
 	public get transcript(): Transcript | undefined {
 		return this.source.transcript;
-	};
+	}
 
 	public set transcript(transcript: Transcript | undefined) {
 		this.source.transcript = transcript;
@@ -170,10 +169,12 @@ export class MediaClip<Props extends MediaClipProps = MediaClipProps> extends Cl
 	public seek(time: Timestamp): Promise<void> {
 		return new Promise((resolve, reject) => {
 			if (!this.element) {
-				return reject(new ReferenceError({
-					code: 'elementNotDefined',
-					message: 'Cannot seek on undefined element',
-				}));
+				return reject(
+					new ReferenceError({
+						code: 'elementNotDefined',
+						message: 'Cannot seek on undefined element',
+					}),
+				);
 			}
 			if (time.millis < this.start.millis || time.millis > this.stop.millis) {
 				time = this.start;
@@ -205,7 +206,7 @@ export class MediaClip<Props extends MediaClipProps = MediaClipProps> extends Cl
 		if (start.millis >= stop.millis) {
 			throw new ValidationError({
 				code: 'invalidKeyframe',
-				message: "Start can't lower than or equal the stop"
+				message: "Start can't lower than or equal the stop",
 			});
 		}
 		// start and/or stop are out of bounds
@@ -285,18 +286,17 @@ export class MediaClip<Props extends MediaClipProps = MediaClipProps> extends Cl
 	 * Generates a new caption track for the current clip using the specified captioning strategy.
 	 * @param strategy An optional CaptionPresetStrategy to define how captions should be generated.
 	 */
-	public async addCaptions(strategy?: CaptionPresetStrategy | (new () => CaptionPresetStrategy)): Promise<CaptionTrack> {
+	public async addCaptions(
+		strategy?: CaptionPresetStrategy | (new () => CaptionPresetStrategy),
+	): Promise<CaptionTrack> {
 		if (!this.track?.composition) {
 			throw new ValidationError({
 				code: 'compositionNotDefined',
 				message: 'Captions can only be generated after the clip has been added to the composition',
 			});
 		}
 
-		const track = await this.track.composition
-			.createTrack('caption')
-			.from(this)
-			.generate(strategy);
+		const track = await this.track.composition.createTrack('caption').from(this).generate(strategy);
 
 		return track;
 	}
@@ -308,7 +308,9 @@ export class MediaClip<Props extends MediaClipProps = MediaClipProps> extends Cl
 	/**
 	 * @deprecated use `addCaptions` instead
 	 */
-	public async generateCaptions(strategy?: CaptionPresetStrategy | (new () => CaptionPresetStrategy)) {
+	public async generateCaptions(
+		strategy?: CaptionPresetStrategy | (new () => CaptionPresetStrategy),
+	) {
 		return this.addCaptions(strategy);
 	}
 }
@@ -1,7 +1,7 @@
 /**
  * Copyright (c) 2024 The Diffusion Studio Authors
  *
- * This Source Code Form is subject to the terms of the Mozilla 
+ * This Source Code Form is subject to the terms of the Mozilla
  * Public License, v. 2.0 that can be found in the LICENSE file.
  */
 
@@ -51,7 +51,7 @@ export class VideoClip extends VisualMixin(MediaClip<VideoClipProps>) {
 		this.element.controls = false;
 		this.element.playsInline = true;
 		this.element.style.display = 'hidden';
-		this.element.crossOrigin = "anonymous";
+		this.element.crossOrigin = 'anonymous';
 
 		(this.textrues.html5.source as any).autoPlay = false;
 		(this.textrues.html5.source as any).loop = false;
@@ -87,7 +87,7 @@ export class VideoClip extends VisualMixin(MediaClip<VideoClipProps>) {
 
 				this.state = 'READY';
 				resolve();
-			}
+			};
 
 			this.element.onerror = () => {
 				this.state = 'ERROR';
@@ -98,7 +98,7 @@ export class VideoClip extends VisualMixin(MediaClip<VideoClipProps>) {
 				});
 
 				reject(this.element.error ?? error);
-			}
+			};
 		});
 	}
 
@@ -132,7 +132,7 @@ export class VideoClip extends VisualMixin(MediaClip<VideoClipProps>) {
 	public exit(): void {
 		if (this.playing) {
 			this.element.pause();
-		};
+		}
 		if (this.filters && this.view.filters) {
 			this.view.filters = null as any;
 		}
 
@@ -7,6 +7,8 @@
 
 import { describe, it, vi, beforeEach, expect } from 'vitest';
 import { AudioSource } from './audio'; // Import the AudioSource class
+import { findSilences } from './audio.utils';
+import { Timestamp } from '../models';
 
 // Mocking the OfflineAudioContext class
 class MockOfflineAudioContext {
@@ -25,6 +27,30 @@ class MockOfflineAudioContext {
 
 vi.stubGlobal('OfflineAudioContext', MockOfflineAudioContext); // Stub the global OfflineAudioContext
 
+describe('AudioUtils', () => {
+	it('all silent', () => {
+		const silences = findSilences(new Float32Array(100).fill(0), -50, 100, 100);
+		expect(silences).toEqual([{
+			start: new Timestamp(0),
+			stop: new Timestamp(100),
+		}]);
+	});
+
+	it('no silences', () => {
+		const silences = findSilences(new Float32Array(100).fill(1), -50, 100, 100);
+		expect(silences).toEqual([]);
+	});
+
+	it('find silences correctly', () => {
+		const samples = Array.from({ length: 500 }, (_, index) => index > 300 ? (index < 400 ? 0 : 1) : -1);
+		const silences = findSilences(new Float32Array(samples), -50, 100, 5000);
+		expect(silences).toEqual([{
+			start: new Timestamp(3010),
+			stop: new Timestamp(4000),
+		}]);
+	});
+});
+
 describe('AudioSource', () => {
 	let audioSource: AudioSource;
 
@@ -33,6 +59,42 @@ describe('AudioSource', () => {
 		audioSource.file = new File([], 'audio.mp3', { type: 'audio/mp3' });
 	});
 
+	it('find silences correctly', async () => {
+		const audioBuffer = {
+			duration: 16,
+			sampleRate: 1000,
+			length: 16000,
+			getChannelData: () => new Float32Array(16000).fill(0), // Return a dummy Float32Array
+		} as any as AudioBuffer;
+		audioSource.audioBuffer = audioBuffer;
+		const silences = await audioSource.silences({});
+		expect(silences).toEqual([{
+			start: new Timestamp(0),
+			stop: new Timestamp(16000),
+		}]);
+	});
+
+	it('find silences correctly with too high minDuration', async () => {
+		const audioBuffer = {
+			duration: 16,
+			sampleRate: 1000,
+			length: 16000,
+			getChannelData: () => new Float32Array(16000).fill(0), // Return a dummy Float32Array
+		} as any as AudioBuffer;
+		audioSource.audioBuffer = audioBuffer;
+		const silences = await audioSource.silences({minDuration: 1e10});
+		expect(silences).toEqual([{
+			start: new Timestamp(0),
+			stop: new Timestamp(16000),
+		}]);
+	});
+
+	it('find silences correctly after caching', async () => {
+		const silences = await audioSource.silences({});
+		const cachedSilences = await audioSource.silences({threshold: 0, minDuration: 1e10, windowSize: 1e10});
+		expect(silences).toEqual(cachedSilences);
+	});
+
 	it('should decode an audio buffer correctly', async () => {
 		const buffer = await audioSource.decode(2, 44100, true);
 		expect(buffer.duration).toBe(5); // Mock duration
 
@@ -1,20 +1,24 @@
 /**
  * Copyright (c) 2024 The Diffusion Studio Authors
  *
- * This Source Code Form is subject to the terms of the Mozilla 
+ * This Source Code Form is subject to the terms of the Mozilla
  * Public License, v. 2.0 that can be found in the LICENSE file.
  */
 
 import { Source } from './source';
 
 import type { ClipType } from '../clips';
 import type { ArgumentTypes } from '../types';
-import type { FastSamplerOptions } from './audio.types';
-import type { Transcript } from '../models';
+import type { FastSamplerOptions, SilenceOptions } from './audio.types';
+import type { Timestamp, Transcript } from '../models';
+import { findSilences } from './audio.utils';
+
+const DEFAULT_SAMPLE_RATE = 3000;
 
 export class AudioSource<T extends Object = {}> extends Source<T> {
 	public readonly type: ClipType = 'audio';
 	private decoding = false;
+	private _silences?: { start: Timestamp; stop: Timestamp }[];
 
 	public transcript?: Transcript;
 	public audioBuffer?: AudioBuffer;
@@ -76,17 +80,21 @@ export class AudioSource<T extends Object = {}> extends Source<T> {
 	 * @param options - Sampling options.
 	 * @returns An array of the max values of the samples in the window.
 	 */
-	public async fastsampler({ length = 60, start = 0, stop, logarithmic = false }: FastSamplerOptions): Promise<Float32Array> {
+	public async fastsampler({
+		length = 60,
+		start = 0,
+		stop,
+		logarithmic = false,
+	}: FastSamplerOptions = {}): Promise<Float32Array> {
 		if (typeof start === 'object') start = start.millis;
 		if (typeof stop === 'object') stop = stop.millis;
 
-		const sampleRate = 3000;
-		const audioBuffer = this.audioBuffer ?? (await this.decode(1, sampleRate, true));
+		const audioBuffer = this.audioBuffer ?? (await this.decode(1, DEFAULT_SAMPLE_RATE, true));
 		const channelData = audioBuffer.getChannelData(0);
 
-		const firstSample = Math.floor(Math.max(start * sampleRate / 1000, 0));
+		const firstSample = Math.floor(Math.max((start * DEFAULT_SAMPLE_RATE) / 1000, 0));
 		const lastSample = stop
-			? Math.floor(Math.min(stop * sampleRate / 1000, audioBuffer.length))
+			? Math.floor(Math.min((stop * DEFAULT_SAMPLE_RATE) / 1000, audioBuffer.length))
 			: audioBuffer.length;
 
 		const windowSize = Math.floor((lastSample - firstSample) / length);
@@ -121,4 +129,28 @@ export class AudioSource<T extends Object = {}> extends Source<T> {
 		}
 		return div;
 	}
+
+	/**
+	 * Find silences in the audio clip. Results are cached.
+	 * 
+	 * uses default sample rate of 3000
+	 * @param options - Silences options.
+	 * @returns An array of the silences (in ms) in the clip.
+	 */
+	public async silences({
+		threshold = -50,
+		minDuration = 100,
+		windowSize = 50,
+	}: SilenceOptions = {}): Promise<{ start: Timestamp; stop: Timestamp }[]> {
+		if (this._silences) return this._silences;
+
+		const audioBuffer = this.audioBuffer ?? (await this.decode(1, DEFAULT_SAMPLE_RATE, true));
+		const length = Math.floor(audioBuffer.length / windowSize);
+		const samples = await this.fastsampler({ length, logarithmic: false });
+
+		const silences = findSilences(samples, threshold, minDuration, this.duration.millis);
+		this._silences = silences;
+
+		return silences;
+	}
 }
@@ -4,20 +4,35 @@ import type { Timestamp } from '../models';
  * Fast sampler options.
  */
 export type FastSamplerOptions = {
-  /**
-   * The number of samples to return.
-   */
-  length?: number;
-  /**
-   * The start time in **milliseconds** relative to the beginning of the clip.
-   */
-  start?: Timestamp | number;
-  /**
-   * The stop time in **milliseconds** relative to the beginning of the clip.
-   */
-  stop?: Timestamp | number;
-  /**
-   * Whether to use a logarithmic scale.
-   */
-  logarithmic?: boolean;
+	/**
+	 * The number of samples to return.
+	 */
+	length?: number;
+	/**
+	 * The start time in **milliseconds** relative to the beginning of the clip.
+	 */
+	start?: Timestamp | number;
+	/**
+	 * The stop time in **milliseconds** relative to the beginning of the clip.
+	 */
+	stop?: Timestamp | number;
+	/**
+	 * Whether to use a logarithmic scale.
+	 */
+	logarithmic?: boolean;
+};
+
+export type SilenceOptions = {
+	/**
+	 * The threshold to use for the silence detection in db.
+	 */	
+	threshold?: number;
+	/**
+	 * The minimum duration of a silence to be considered a silence in milliseconds.
+	 */
+	minDuration?: number;
+	/**
+	 * The window size to use for the silence detection.
+	 */
+	windowSize?: number;
 };