Skip to content

Commit dd4ba04

Browse files
committed
added VAD to 'SepiaVoiceRecorder'
1 parent a3861f7 commit dd4ba04

File tree

1 file changed

+60
-2
lines changed

1 file changed

+60
-2
lines changed

src/sepia-recorder.js

Lines changed: 60 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,31 @@
6464
}
6565
}
6666
var waveEncoderIsBuffering = false;
67+
68+
//Voice-Activity-Detection events
69+
SepiaVoiceRecorder.onVadStateChange = function(state, code){
70+
console.log("SepiaVoiceRecorder - onVadStateChange", state, code);
71+
}
72+
function onVadData(data){
73+
//console.log("onVadData", data); //DEBUG
74+
if (data.voiceActivity != undefined){}
75+
if (data.voiceEnergy != undefined){}
76+
if (data.vadSequenceCode != undefined){
77+
//console.log("VAD sequence event: " + data.vadSequenceMsg); //DEBUG
78+
if (data.vadSequenceCode == 1){
79+
SepiaVoiceRecorder.onVadStateChange("vaup", 1); //1: voice activity registered
80+
}else if (data.vadSequenceCode == 2){
81+
SepiaVoiceRecorder.onVadStateChange("speechstart", 2); //2: sequence started (continous speech)
82+
}else if (data.vadSequenceCode == 3){
83+
SepiaVoiceRecorder.onVadStateChange("vadown", 3); //3: voice activity gone
84+
}else if (data.vadSequenceCode == 4){
85+
SepiaVoiceRecorder.onVadStateChange("speechend", 4); //4: speech finished max. time
86+
}else if (data.vadSequenceCode == 5){
87+
SepiaVoiceRecorder.onVadStateChange("speechend", 5); //5: speech finished (sequence end)
88+
//data.vadSequenceStarted, data.vadSequenceEnded
89+
}
90+
}
91+
}
6792

6893
//SpeechRecognition events
6994
SepiaVoiceRecorder.onSpeechRecognitionStateChange = function(ev){
@@ -119,7 +144,9 @@
119144
if (options.resamplerBufferSize) resamplerBufferSize = options.resamplerBufferSize;
120145
}
121146
var useRecognitionModule = !!options.asr;
122-
if (!options.asr) options.asr = {};
147+
if (typeof options.asr != "object") options.asr = {};
148+
var useVadModule = !!options.vad;
149+
if (typeof options.vad != "object") options.vad = {};
123150
//audio source
124151
var customSource = undefined;
125152
if (options.fileUrl){
@@ -172,6 +199,29 @@
172199
}
173200
};
174201
var waveEncoderIndex;
202+
203+
var defaultVadBuffer = 480*2; //480 is the 30ms window for WebRTC VAD 16k - its a bit "special"
204+
var vadWorker = {
205+
name: 'webrtc-vad-worker', //More experimental version: 'sepia-vad-worker'
206+
type: 'worker',
207+
settings: {
208+
onmessage: onVadData,
209+
options: {
210+
setup: {
211+
inputSampleRate: targetSampleRate,
212+
inputSampleSize: resamplerBufferSize,
213+
bufferSize: options.vad.bufferSize || defaultVadBuffer, //restrictions apply ^^
214+
vadMode: options.vad.mode || 3,
215+
sequence: {
216+
silenceActivationTime: 450, //250,
217+
maxSequenceTime: options.vad.maxSequenceTime || 10000,
218+
minSequenceTime: options.vad.minSequenceTime || 600
219+
}
220+
}
221+
}
222+
}
223+
};
224+
var vadWorkerIndex;
175225

176226
var sttServerModule = {
177227
name: 'stt-socket',
@@ -213,7 +263,15 @@
213263

214264
//- resampler is required
215265
activeModules.push(resampler);
216-
resamplerIndex = activeModules.length;
266+
resamplerIndex = activeModules.length;
267+
268+
//- use VAD?
269+
if (useVadModule){
270+
activeModules.push(vadWorker);
271+
vadWorkerIndex = activeModules.length;
272+
SepiaVoiceRecorder.vadModule = vadWorker;
273+
resampler.settings.sendToModules.push(vadWorkerIndex); //add to resampler
274+
}
217275

218276
//- use either speech-recognition (ASR) or wave-encoder
219277
if (useRecognitionModule){

0 commit comments

Comments
 (0)