Skip to content

Commit ef61b88

Browse files
committed
added silence detection
1 parent 0e00b19 commit ef61b88

File tree

5 files changed

+244
-82
lines changed

5 files changed

+244
-82
lines changed

playground/main.ts

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -32,23 +32,14 @@ const image = await composition.add(
3232
})
3333
);
3434

35-
const track = composition.findTracks(core.VideoTrack).at(0);
35+
const audioTrack = new core.AudioTrack();
3636

37-
console.log(await track?.detectSilences());
38-
39-
const track2 = new core.VideoTrack();
40-
41-
const video_tutorial = await new core.VideoClip(await core.VideoSource
42-
.from('/tutorial.mp4'), {
37+
const audioTest = await new core.AudioClip(await core.AudioSource
38+
.from('/silences.mp3'), {
4339
volume: 0.1,
44-
anchor: 0.5,
45-
position: 'center',
46-
height: '100%',
4740
});
4841

49-
await track2.add(video_tutorial);
50-
51-
console.log("video_tutorial", await track2.detectSilences());
42+
await audioTrack.add(audioTest);
5243

5344
image.animate()
5445
.rotation(-16).to(14, 5).to(-7, 10).to(24, 7).to(-3, 9).to(19, 7).to(-14, 12).to(5, 9).to(-30, 13)

public/silences.mp3

43.1 KB
Binary file not shown.

src/tracks/media/media.spec.ts

Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,147 @@ import { Composition } from '../../composition';
1010
import { MediaClip } from '../../clips';
1111
import { Timestamp } from '../../models';
1212
import { MediaTrack } from './media';
13+
import { AudioSource } from '../../sources';
14+
import { getSilenceArrayBuffer } from './media.utils';
15+
16+
17+
// Mocking the OfflineAudioContext class for silence detection
18+
class MockSilenceAudioContext {
19+
constructor(public numberOfChannels: number, public length: number, public sampleRate: number) { }
20+
21+
decodeAudioData(_: ArrayBuffer): Promise<AudioBuffer> {
22+
const audioBuffer = {
23+
duration: 5, // Mock duration
24+
sampleRate: 1000,
25+
length: 5000,
26+
getChannelData: () => new Float32Array(5000).fill(0.5), // Return a dummy Float32Array
27+
} as any as AudioBuffer;
28+
return Promise.resolve(audioBuffer);
29+
}
30+
31+
close() {
32+
return Promise.resolve();
33+
}
34+
}
35+
36+
vi.stubGlobal('AudioContext', MockSilenceAudioContext); // Stub the global OfflineAudioContext
37+
38+
describe('Get silence array buffer', () => {
39+
it('should get silence array buffer', async () => {
40+
const audioBuffer = {
41+
duration: 5, // Mock duration
42+
sampleRate: 44100,
43+
length: 5 * 44100,
44+
getChannelData: () => {
45+
const totalLength = 5 * 44100;
46+
return Float32Array.from({length: totalLength}, (_, i) => {
47+
if (i < 2 * 44100) {
48+
return 1;
49+
}
50+
else if (i >= 3 * 44100) {
51+
return -1;
52+
}
53+
return 0;
54+
});
55+
},
56+
} as any as AudioBuffer;
57+
const silences = getSilenceArrayBuffer(audioBuffer, 44100, 1, -50, 0);
58+
expect(silences).toEqual([{
59+
start: 0,
60+
stop: 2
61+
},
62+
{
63+
start: 3,
64+
stop: 5
65+
}]);
66+
});
67+
68+
it('no silence in getSilenceArrayBuffer', () => {
69+
const audioBuffer = {
70+
duration: 5, // Mock duration
71+
sampleRate: 44100,
72+
length: 5 * 44100,
73+
getChannelData: (i: number) => new Float32Array(5 * 44100).fill(0),
74+
} as any as AudioBuffer;
75+
const silences = getSilenceArrayBuffer(audioBuffer, 1024, 1, -50, 0);
76+
expect(silences).toEqual([]);
77+
});
78+
79+
it('only silence in getSilenceArrayBuffer', () => {
80+
const audioBuffer = {
81+
duration: 5, // Mock duration
82+
sampleRate: 44100,
83+
length: 5 * 44100,
84+
getChannelData: () => new Float32Array(5 * 44100).fill(1),
85+
} as any as AudioBuffer;
86+
const silences = getSilenceArrayBuffer(audioBuffer, 1024, 1, -50, 0);
87+
expect(silences).toEqual([{
88+
start: 0,
89+
stop: 5
90+
}]);
91+
});
92+
93+
it('should throw error if no sample rate', () => {
94+
const audioBuffer = {
95+
sampleRate: undefined,
96+
} as any as AudioBuffer;
97+
expect(() => getSilenceArrayBuffer(audioBuffer, 1024, 1, -50, 0)).toThrow();
98+
});
99+
});
100+
101+
102+
describe('Find silences in a track', () => {
103+
let comp: Composition;
104+
let track: MediaTrack<MediaClip>;
105+
let file: File;
106+
const updateMock = vi.fn();
107+
108+
beforeEach(() => {
109+
// frame and seconds are the same
110+
comp = new Composition();
111+
file = new File([], "test.mp3");
112+
track = comp.shiftTrack(new MediaTrack<MediaClip>());
113+
track.on('update', updateMock);
114+
});
115+
116+
it('empty track should have no silences', async () => {
117+
const emptyTrack = new MediaTrack();
118+
const silences = await track.detectSilences();
119+
expect(silences).toEqual([]);
120+
});
121+
122+
it('track with clip but no element should have no silences', async () => {
123+
const clip = new MediaClip();
124+
clip.source = await AudioSource.from(file);
125+
await track.add(clip);
126+
const silences = await track.detectSilences();
127+
expect(silences).toEqual([]);
128+
});
129+
130+
it('track with clip and element should find silences', async () => {
131+
const clip = new MediaClip();
132+
clip.source = await AudioSource.from(file);
133+
clip.element = new Audio();
134+
clip.duration.seconds = 5;
135+
const clip2 = new MediaClip();
136+
clip2.source = await AudioSource.from(file);
137+
clip2.element = new Audio();
138+
clip2.duration.seconds = 5;
139+
clip2.offset.seconds = 5;
140+
await track.add(clip);
141+
await track.add(clip2);
142+
143+
const silences = await track.detectSilences();
144+
expect(silences).toEqual([{
145+
start: 0,
146+
stop: 5
147+
},
148+
{
149+
start: 5.001,
150+
stop: 10.001000000000001
151+
}]);
152+
});
153+
});
13154

14155
describe('The Media Track Object', () => {
15156
let comp: Composition;

src/tracks/media/media.ts

Lines changed: 8 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import { Track } from '../track';
99

1010
import type { MediaClip } from '../../clips';
1111
import type { Timestamp } from '../../models';
12+
import { getSilenceArrayBuffer } from './media.utils';
1213

1314
export class MediaTrack<Clip extends MediaClip> extends Track<MediaClip> {
1415
public clips: Clip[] = [];
@@ -20,6 +21,9 @@ export class MediaTrack<Clip extends MediaClip> extends Track<MediaClip> {
2021

2122
/**
2223
* Detect periods of silence across all clips in the track
24+
*
25+
* This currently only searches for silences in each clip individually
26+
*
2327
* @param subSample Number of samples to skip when analyzing audio (higher = faster but less accurate)
2428
* @param silenceThreshold Volume threshold in dB below which is considered silence
2529
* @param minSilenceDuration Minimum duration in seconds for a silence period to be included
@@ -41,79 +45,14 @@ export class MediaTrack<Clip extends MediaClip> extends Track<MediaClip> {
4145

4246
// Get audio data for this clip
4347
const arrayBuffer = await (await clip.source.getFile()).arrayBuffer();
44-
const audioBuffer = await audioContext.decodeAudioData(arrayBuffer);
45-
const channelData = audioBuffer.getChannelData(0);
48+
const audioBuffer = await audioContext.decodeAudioData(arrayBuffer);
4649

47-
// Process samples in chunks
48-
const sampleSize = subSample;
49-
const numChunks = Math.floor(channelData.length / sampleSize);
50-
const minSilenceChunks = Math.floor(minSilenceDuration * audioBuffer.sampleRate / subSample);
50+
silences.push(...getSilenceArrayBuffer(audioBuffer, subSample, minSilenceDuration, silenceThreshold, clip.start.seconds));
5151

52-
const decibelValues: number[] = [];
53-
54-
for (let i = 0; i < numChunks; i++) {
55-
const chunk = channelData.slice(i * sampleSize, (i + 1) * sampleSize);
56-
const rms = Math.sqrt(chunk.reduce((sum, val) => sum + val * val, 0) / chunk.length);
57-
const db = 20 * Math.log10(Math.max(rms, 1e-10));
58-
decibelValues.push(db);
59-
}
60-
61-
// Find silence periods in this clip
62-
let silenceStart: number | null = null;
63-
64-
for (let i = 0; i < decibelValues.length; i++) {
65-
if (decibelValues[i] < silenceThreshold) {
66-
if (silenceStart === null) {
67-
silenceStart = i;
68-
}
69-
} else if (silenceStart !== null) {
70-
const silenceDuration = i - silenceStart;
71-
if (silenceDuration >= minSilenceChunks) {
72-
// Convert chunk indices to seconds and adjust for clip offset
73-
const silenceStartTime = (silenceStart * sampleSize) / audioBuffer.sampleRate;
74-
const silenceStopTime = (i * sampleSize) / audioBuffer.sampleRate;
75-
76-
silences.push({
77-
start: silenceStartTime + clip.start.seconds,
78-
stop: silenceStopTime + clip.start.seconds
79-
});
80-
}
81-
silenceStart = null;
82-
}
83-
}
84-
85-
// Handle silence at end of clip
86-
if (silenceStart !== null) {
87-
const silenceDuration = decibelValues.length - silenceStart;
88-
if (silenceDuration >= minSilenceChunks || silenceDuration == decibelValues.length) {
89-
silences.push({
90-
start: (silenceStart * sampleSize) / audioBuffer.sampleRate + clip.start.seconds,
91-
stop: audioBuffer.duration + clip.start.seconds
92-
});
93-
}
94-
}
9552
}
9653

9754
await audioContext.close();
9855

99-
// Sort silences by start time and merge overlapping periods
100-
silences.sort((a, b) => a.start - b.start);
101-
const mergedSilences: typeof silences = [];
102-
103-
for (const silence of silences) {
104-
if (mergedSilences.length === 0) {
105-
mergedSilences.push(silence);
106-
continue;
107-
}
108-
109-
const lastSilence = mergedSilences[mergedSilences.length - 1];
110-
if (silence.start <= lastSilence.stop) {
111-
// Merge overlapping silence periods
112-
lastSilence.stop = Math.max(lastSilence.stop, silence.stop);
113-
} else {
114-
mergedSilences.push(silence);
115-
}
116-
}
117-
118-
return mergedSilences;
56+
return silences.sort((a, b) => a.start - b.start);
11957
}
58+
}

src/tracks/media/media.utils.ts

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
/**
2+
* Get the array buffer for silence detection
3+
*
4+
* @param audioBuffer - The audio buffer to process
5+
* @param subSample - The number of samples to process at a time
6+
* @param minSilenceDuration - The minimum duration of silence to detect
7+
* @param silenceThreshold - The threshold for silence detection (in decibels < 0)
8+
* @param offsetSeconds - The offset in seconds to apply to the detected silences
9+
*/
10+
export function getSilenceArrayBuffer(
11+
audioBuffer: AudioBuffer,
12+
subSample: number,
13+
minSilenceDuration: number,
14+
silenceThreshold: number,
15+
offsetSeconds: number
16+
) {
17+
if (audioBuffer.sampleRate === undefined) {
18+
throw new Error("Audio buffer has no sample rate");
19+
}
20+
21+
const channelData = audioBuffer.getChannelData(0);
22+
console.log("max", channelData.reduce((max, val) => Math.max(max, val), 0));
23+
console.log("min", channelData.reduce((min, val) => Math.min(min, val), 0));
24+
25+
// Process samples in chunks
26+
const sampleSize = subSample;
27+
const numChunks = Math.floor(channelData.length / sampleSize);
28+
const minSilenceChunks = Math.floor(
29+
(minSilenceDuration * audioBuffer.sampleRate) / subSample
30+
);
31+
32+
const decibelValues: number[] = [];
33+
const silences: { start: number; stop: number }[] = [];
34+
35+
for (let i = 0; i < numChunks; i++) {
36+
const chunk = channelData.slice(i * sampleSize, (i + 1) * sampleSize);
37+
const rms = Math.sqrt(
38+
chunk.reduce((sum, val) => sum + val * val, 0) / chunk.length
39+
);
40+
const db = 20 * Math.log10(Math.max(rms, 1e-10));
41+
decibelValues.push(db);
42+
}
43+
44+
console.log("decibel", decibelValues);
45+
46+
// Find silence periods in this clip
47+
let silenceStart: number | null = null;
48+
49+
for (let i = 0; i < decibelValues.length; i++) {
50+
if (decibelValues[i] > silenceThreshold) {
51+
if (silenceStart === null) {
52+
silenceStart = i;
53+
}
54+
} else if (silenceStart !== null) {
55+
const silenceDuration = i - silenceStart;
56+
if (silenceDuration >= minSilenceChunks) {
57+
// Convert chunk indices to seconds and adjust for clip offset
58+
const silenceStartTime =
59+
(silenceStart * sampleSize) / audioBuffer.sampleRate;
60+
const silenceStopTime =
61+
(i * sampleSize) / audioBuffer.sampleRate;
62+
63+
silences.push({
64+
start: silenceStartTime + offsetSeconds,
65+
stop: silenceStopTime + offsetSeconds,
66+
});
67+
}
68+
silenceStart = null;
69+
}
70+
}
71+
72+
// Handle silence at end of clip
73+
if (silenceStart !== null) {
74+
const silenceDuration = decibelValues.length - silenceStart;
75+
if (
76+
silenceDuration >= minSilenceChunks ||
77+
silenceDuration == decibelValues.length
78+
) {
79+
silences.push({
80+
start:
81+
(silenceStart * sampleSize) / audioBuffer.sampleRate +
82+
offsetSeconds,
83+
stop:
84+
(channelData.length) / audioBuffer.sampleRate +
85+
offsetSeconds,
86+
});
87+
}
88+
}
89+
90+
return silences;
91+
}

0 commit comments

Comments
 (0)