Skip to content

Commit c99cd44

Browse files
committed
added find silences to audio source
1 parent ad6cdc4 commit c99cd44

File tree

5 files changed

+169
-28
lines changed

5 files changed

+169
-28
lines changed

playground/main.ts

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,10 +34,15 @@ const image = await composition.add(
3434

3535
const audioTrack = new core.AudioTrack();
3636

37-
const audioTest = await new core.AudioClip(await core.AudioSource
38-
.from('/silences.mp3'), {
39-
volume: 0.1,
40-
});
37+
const audioSource = await core.AudioSource.from('/silences.mp3');
38+
39+
const silences = await audioSource.silences({});
40+
console.log(silences);
41+
42+
const audioTest = await new core.AudioClip(audioSource, {
43+
volume: 0.1,
44+
});
45+
console.log("duration", audioTest.duration.millis);
4146

4247
await audioTrack.add(audioTest);
4348

src/sources/audio.spec.ts

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77

88
import { describe, it, vi, beforeEach, expect } from 'vitest';
99
import { AudioSource } from './audio'; // Import the AudioSource class
10+
import { findSilences } from './audio.utils';
11+
import { Timestamp } from '../models';
1012

1113
// Mocking the OfflineAudioContext class
1214
class MockOfflineAudioContext {
@@ -25,6 +27,33 @@ class MockOfflineAudioContext {
2527

2628
vi.stubGlobal('OfflineAudioContext', MockOfflineAudioContext); // Stub the global OfflineAudioContext
2729

30+
describe('AudioUtils', () => {
31+
it('all silent', () => {
32+
const silences = findSilences(new Float32Array(100).fill(1), -50, 100, 100);
33+
expect(silences).toEqual([{
34+
start: new Timestamp(0),
35+
stop: new Timestamp(100),
36+
}]);
37+
});
38+
39+
it('no silences', () => {
40+
const silences = findSilences(new Float32Array(100).fill(0), -50, 100, 100);
41+
expect(silences).toEqual([]);
42+
});
43+
44+
it('find silences correctly', () => {
45+
const samples = Array.from({ length: 500 }, (_, index) => index > 300 ? (index < 400 ? 0 : 1) : -1);
46+
const silences = findSilences(new Float32Array(samples), -50, 100, 5000);
47+
expect(silences).toEqual([{
48+
start: new Timestamp(0),
49+
stop: new Timestamp(3010),
50+
}, {
51+
start: new Timestamp(4000),
52+
stop: new Timestamp(5000),
53+
}]);
54+
});
55+
});
56+
2857
describe('AudioSource', () => {
2958
let audioSource: AudioSource;
3059

@@ -33,6 +62,14 @@ describe('AudioSource', () => {
3362
audioSource.file = new File([], 'audio.mp3', { type: 'audio/mp3' });
3463
});
3564

65+
it('find silences correctly', async () => {
66+
const silences = await audioSource.silences({});
67+
expect(silences).toEqual([{
68+
start: new Timestamp(0),
69+
stop: new Timestamp(5000),
70+
}]);
71+
});
72+
3673
it('should decode an audio buffer correctly', async () => {
3774
const buffer = await audioSource.decode(2, 44100, true);
3875
expect(buffer.duration).toBe(5); // Mock duration

src/sources/audio.ts

Lines changed: 36 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,19 @@
11
/**
22
* Copyright (c) 2024 The Diffusion Studio Authors
33
*
4-
* This Source Code Form is subject to the terms of the Mozilla
4+
* This Source Code Form is subject to the terms of the Mozilla
55
* Public License, v. 2.0 that can be found in the LICENSE file.
66
*/
77

88
import { Source } from './source';
99

1010
import type { ClipType } from '../clips';
1111
import type { ArgumentTypes } from '../types';
12-
import type { FastSamplerOptions } from './audio.types';
13-
import type { Transcript } from '../models';
12+
import type { FastSamplerOptions, SilenceOptions } from './audio.types';
13+
import type { Timestamp, Transcript } from '../models';
14+
import { findSilences } from './audio.utils';
15+
16+
const DEFAULT_SAMPLE_RATE = 3000;
1417

1518
export class AudioSource<T extends Object = {}> extends Source<T> {
1619
public readonly type: ClipType = 'audio';
@@ -76,17 +79,21 @@ export class AudioSource<T extends Object = {}> extends Source<T> {
7679
* @param options - Sampling options.
7780
* @returns An array of the max values of the samples in the window.
7881
*/
79-
public async fastsampler({ length = 60, start = 0, stop, logarithmic = false }: FastSamplerOptions): Promise<Float32Array> {
82+
public async fastsampler({
83+
length = 60,
84+
start = 0,
85+
stop,
86+
logarithmic = false,
87+
}: FastSamplerOptions): Promise<Float32Array> {
8088
if (typeof start === 'object') start = start.millis;
8189
if (typeof stop === 'object') stop = stop.millis;
8290

83-
const sampleRate = 3000;
84-
const audioBuffer = this.audioBuffer ?? (await this.decode(1, sampleRate, true));
91+
const audioBuffer = this.audioBuffer ?? (await this.decode(1, DEFAULT_SAMPLE_RATE, true));
8592
const channelData = audioBuffer.getChannelData(0);
8693

87-
const firstSample = Math.floor(Math.max(start * sampleRate / 1000, 0));
94+
const firstSample = Math.floor(Math.max((start * DEFAULT_SAMPLE_RATE) / 1000, 0));
8895
const lastSample = stop
89-
? Math.floor(Math.min(stop * sampleRate / 1000, audioBuffer.length))
96+
? Math.floor(Math.min((stop * DEFAULT_SAMPLE_RATE) / 1000, audioBuffer.length))
9097
: audioBuffer.length;
9198

9299
const windowSize = Math.floor((lastSample - firstSample) / length);
@@ -121,4 +128,25 @@ export class AudioSource<T extends Object = {}> extends Source<T> {
121128
}
122129
return div;
123130
}
131+
132+
/**
133+
* Find silences in the audio clip
134+
*
135+
* uses default sample rate of 3000
136+
* @param options - Silences options.
137+
* @returns An array of the silences (in ms) in the clip.
138+
*/
139+
public async silences({
140+
threshold = -50,
141+
minDuration = 5,
142+
windowSize = 50,
143+
}: SilenceOptions): Promise<{ start: Timestamp; stop: Timestamp }[]> {
144+
const audioBuffer = this.audioBuffer ?? (await this.decode(1, DEFAULT_SAMPLE_RATE, true));
145+
const length = Math.floor(audioBuffer.length / windowSize);
146+
const samples = await this.fastsampler({ length, logarithmic: false });
147+
148+
const silences = findSilences(samples, threshold, minDuration, this.duration.millis);
149+
150+
return silences;
151+
}
124152
}

src/sources/audio.types.ts

Lines changed: 31 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4,20 +4,35 @@ import type { Timestamp } from '../models';
44
* Fast sampler options.
55
*/
66
export type FastSamplerOptions = {
7-
/**
8-
* The number of samples to return.
9-
*/
10-
length?: number;
11-
/**
12-
* The start time in **milliseconds** relative to the beginning of the clip.
13-
*/
14-
start?: Timestamp | number;
15-
/**
16-
* The stop time in **milliseconds** relative to the beginning of the clip.
17-
*/
18-
stop?: Timestamp | number;
19-
/**
20-
* Whether to use a logarithmic scale.
21-
*/
22-
logarithmic?: boolean;
7+
/**
8+
* The number of samples to return.
9+
*/
10+
length?: number;
11+
/**
12+
* The start time in **milliseconds** relative to the beginning of the clip.
13+
*/
14+
start?: Timestamp | number;
15+
/**
16+
* The stop time in **milliseconds** relative to the beginning of the clip.
17+
*/
18+
stop?: Timestamp | number;
19+
/**
20+
* Whether to use a logarithmic scale.
21+
*/
22+
logarithmic?: boolean;
23+
};
24+
25+
export type SilenceOptions = {
26+
/**
27+
* The threshold to use for the silence detection in db.
28+
*/
29+
threshold?: number;
30+
/**
31+
* The minimum duration of a silence to be considered a silence in milliseconds.
32+
*/
33+
minDuration?: number;
34+
/**
35+
* The window size to use for the silence detection.
36+
*/
37+
windowSize?: number;
2338
};

src/sources/audio.utils.ts

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
import { Timestamp } from '../models';
2+
3+
/**
4+
* Find the silences in an audio clip.
5+
* @param samples - The sub-sampled samples of the audio clip.
6+
* @param threshold - The threshold to use for the silence detection in db.
7+
* @param minDuration - The minimum duration of a silence to be considered a silence in milliseconds.
8+
* @param duration - The length of the audio clip in milliseconds.
9+
* @returns An array of the silences in the clip.
10+
*/
11+
export function findSilences(
12+
samples: Float32Array,
13+
threshold: number,
14+
minDuration: number,
15+
duration: number,
16+
): { start: Timestamp; stop: Timestamp }[] {
17+
const decibelValues = samples.map((sample) => 20 * Math.log10(Math.max(Math.abs(sample), 1e-10)));
18+
const silences: { start: Timestamp; stop: Timestamp }[] = [];
19+
20+
// Find silence periods in this clip
21+
let silenceStart: number | null = null;
22+
23+
for (let i = 0; i < decibelValues.length; i++) {
24+
if (decibelValues[i] > threshold) {
25+
if (silenceStart === null) {
26+
silenceStart = i;
27+
}
28+
} else if (silenceStart !== null) {
29+
const silenceDuration = ((i - silenceStart) * duration) / decibelValues.length;
30+
if (silenceDuration >= minDuration) {
31+
// Convert chunk indices to seconds and adjust for clip offset
32+
const silenceStartFrame = Math.round((silenceStart * duration) / decibelValues.length);
33+
const silenceStopFrame = Math.round((i * duration) / decibelValues.length);
34+
35+
silences.push({
36+
start: new Timestamp(silenceStartFrame),
37+
stop: new Timestamp(silenceStopFrame),
38+
});
39+
}
40+
silenceStart = null;
41+
}
42+
}
43+
44+
// Handle silence at end of clip
45+
if (silenceStart !== null) {
46+
const silenceDuration = decibelValues.length - silenceStart;
47+
if (silenceDuration >= minDuration || silenceDuration == decibelValues.length) {
48+
silences.push({
49+
start: new Timestamp(Math.round((silenceStart * duration) / decibelValues.length)),
50+
stop: new Timestamp(duration),
51+
});
52+
}
53+
}
54+
55+
return silences;
56+
}

0 commit comments

Comments
 (0)