|
1 | 1 | /** |
2 | 2 | * Copyright (c) 2024 The Diffusion Studio Authors |
3 | 3 | * |
4 | | - * This Source Code Form is subject to the terms of the Mozilla |
| 4 | + * This Source Code Form is subject to the terms of the Mozilla |
5 | 5 | * Public License, v. 2.0 that can be found in the LICENSE file. |
6 | 6 | */ |
7 | 7 |
|
8 | 8 | import { Source } from './source'; |
9 | 9 |
|
10 | 10 | import type { ClipType } from '../clips'; |
11 | 11 | import type { ArgumentTypes } from '../types'; |
12 | | -import type { FastSamplerOptions } from './audio.types'; |
13 | | -import type { Transcript } from '../models'; |
| 12 | +import type { FastSamplerOptions, SilenceOptions } from './audio.types'; |
| 13 | +import type { Timestamp, Transcript } from '../models'; |
| 14 | +import { findSilences } from './audio.utils'; |
| 15 | + |
| 16 | +const DEFAULT_SAMPLE_RATE = 3000; |
14 | 17 |
|
15 | 18 | export class AudioSource<T extends Object = {}> extends Source<T> { |
16 | 19 | public readonly type: ClipType = 'audio'; |
@@ -76,17 +79,21 @@ export class AudioSource<T extends Object = {}> extends Source<T> { |
76 | 79 | * @param options - Sampling options. |
77 | 80 | * @returns An array of the max values of the samples in the window. |
78 | 81 | */ |
79 | | - public async fastsampler({ length = 60, start = 0, stop, logarithmic = false }: FastSamplerOptions): Promise<Float32Array> { |
| 82 | + public async fastsampler({ |
| 83 | + length = 60, |
| 84 | + start = 0, |
| 85 | + stop, |
| 86 | + logarithmic = false, |
| 87 | + }: FastSamplerOptions): Promise<Float32Array> { |
80 | 88 | if (typeof start === 'object') start = start.millis; |
81 | 89 | if (typeof stop === 'object') stop = stop.millis; |
82 | 90 |
|
83 | | - const sampleRate = 3000; |
84 | | - const audioBuffer = this.audioBuffer ?? (await this.decode(1, sampleRate, true)); |
| 91 | + const audioBuffer = this.audioBuffer ?? (await this.decode(1, DEFAULT_SAMPLE_RATE, true)); |
85 | 92 | const channelData = audioBuffer.getChannelData(0); |
86 | 93 |
|
87 | | - const firstSample = Math.floor(Math.max(start * sampleRate / 1000, 0)); |
| 94 | + const firstSample = Math.floor(Math.max((start * DEFAULT_SAMPLE_RATE) / 1000, 0)); |
88 | 95 | const lastSample = stop |
89 | | - ? Math.floor(Math.min(stop * sampleRate / 1000, audioBuffer.length)) |
| 96 | + ? Math.floor(Math.min((stop * DEFAULT_SAMPLE_RATE) / 1000, audioBuffer.length)) |
90 | 97 | : audioBuffer.length; |
91 | 98 |
|
92 | 99 | const windowSize = Math.floor((lastSample - firstSample) / length); |
@@ -121,4 +128,25 @@ export class AudioSource<T extends Object = {}> extends Source<T> { |
121 | 128 | } |
122 | 129 | return div; |
123 | 130 | } |
| 131 | + |
| 132 | + /** |
| 133 | + * Find silences in the audio clip |
| 134 | + * |
| 135 | + * uses default sample rate of 3000 |
| 136 | + * @param options - Silences options. |
| 137 | + * @returns An array of the silences (in ms) in the clip. |
| 138 | + */ |
| 139 | + public async silences({ |
| 140 | + threshold = -50, |
| 141 | + minDuration = 5, |
| 142 | + windowSize = 50, |
| 143 | + }: SilenceOptions): Promise<{ start: Timestamp; stop: Timestamp }[]> { |
| 144 | + const audioBuffer = this.audioBuffer ?? (await this.decode(1, DEFAULT_SAMPLE_RATE, true)); |
| 145 | + const length = Math.floor(audioBuffer.length / windowSize); |
| 146 | + const samples = await this.fastsampler({ length, logarithmic: false }); |
| 147 | + |
| 148 | + const silences = findSilences(samples, threshold, minDuration, this.duration.millis); |
| 149 | + |
| 150 | + return silences; |
| 151 | + } |
124 | 152 | } |
0 commit comments