|
64 | 64 | } |
65 | 65 | } |
66 | 66 | var waveEncoderIsBuffering = false; |
| 67 | + |
| 68 | + //Voice-Activity-Detection events |
| 69 | + SepiaVoiceRecorder.onVadStateChange = function(state, code){ |
| 70 | + console.log("SepiaVoiceRecorder - onVadStateChange", state, code); |
| 71 | + } |
| 72 | + function onVadData(data){ |
| 73 | + //console.log("onVadData", data); //DEBUG |
| 74 | + if (data.voiceActivity != undefined){} |
| 75 | + if (data.voiceEnergy != undefined){} |
| 76 | + if (data.vadSequenceCode != undefined){ |
| 77 | + //console.log("VAD sequence event: " + data.vadSequenceMsg); //DEBUG |
| 78 | + if (data.vadSequenceCode == 1){ |
| 79 | + SepiaVoiceRecorder.onVadStateChange("vaup", 1); //1: voice activity registered |
| 80 | + }else if (data.vadSequenceCode == 2){ |
| 81 | + SepiaVoiceRecorder.onVadStateChange("speechstart", 2); //2: sequence started (continous speech) |
| 82 | + }else if (data.vadSequenceCode == 3){ |
| 83 | + SepiaVoiceRecorder.onVadStateChange("vadown", 3); //3: voice activity gone |
| 84 | + }else if (data.vadSequenceCode == 4){ |
| 85 | + SepiaVoiceRecorder.onVadStateChange("speechend", 4); //4: speech finished max. time |
| 86 | + }else if (data.vadSequenceCode == 5){ |
| 87 | + SepiaVoiceRecorder.onVadStateChange("speechend", 5); //5: speech finished (sequence end) |
| 88 | + //data.vadSequenceStarted, data.vadSequenceEnded |
| 89 | + } |
| 90 | + } |
| 91 | + } |
67 | 92 |
|
68 | 93 | //SpeechRecognition events |
69 | 94 | SepiaVoiceRecorder.onSpeechRecognitionStateChange = function(ev){ |
|
119 | 144 | if (options.resamplerBufferSize) resamplerBufferSize = options.resamplerBufferSize; |
120 | 145 | } |
121 | 146 | var useRecognitionModule = !!options.asr; |
122 | | - if (!options.asr) options.asr = {}; |
| 147 | + if (typeof options.asr != "object") options.asr = {}; |
| 148 | + var useVadModule = !!options.vad; |
| 149 | + if (typeof options.vad != "object") options.vad = {}; |
123 | 150 | //audio source |
124 | 151 | var customSource = undefined; |
125 | 152 | if (options.fileUrl){ |
|
172 | 199 | } |
173 | 200 | }; |
174 | 201 | var waveEncoderIndex; |
| 202 | + |
| 203 | + var defaultVadBuffer = 480*2; //480 is the 30ms window for WebRTC VAD 16k - its a bit "special" |
| 204 | + var vadWorker = { |
| 205 | + name: 'webrtc-vad-worker', //More experimental version: 'sepia-vad-worker' |
| 206 | + type: 'worker', |
| 207 | + settings: { |
| 208 | + onmessage: onVadData, |
| 209 | + options: { |
| 210 | + setup: { |
| 211 | + inputSampleRate: targetSampleRate, |
| 212 | + inputSampleSize: resamplerBufferSize, |
| 213 | + bufferSize: options.vad.bufferSize || defaultVadBuffer, //restrictions apply ^^ |
| 214 | + vadMode: options.vad.mode || 3, |
| 215 | + sequence: { |
| 216 | + silenceActivationTime: 450, //250, |
| 217 | + maxSequenceTime: options.vad.maxSequenceTime || 10000, |
| 218 | + minSequenceTime: options.vad.minSequenceTime || 600 |
| 219 | + } |
| 220 | + } |
| 221 | + } |
| 222 | + } |
| 223 | + }; |
| 224 | + var vadWorkerIndex; |
175 | 225 |
|
176 | 226 | var sttServerModule = { |
177 | 227 | name: 'stt-socket', |
|
213 | 263 |
|
214 | 264 | //- resampler is required |
215 | 265 | activeModules.push(resampler); |
216 | | - resamplerIndex = activeModules.length; |
| 266 | + resamplerIndex = activeModules.length; |
| 267 | + |
| 268 | + //- use VAD? |
| 269 | + if (useVadModule){ |
| 270 | + activeModules.push(vadWorker); |
| 271 | + vadWorkerIndex = activeModules.length; |
| 272 | + SepiaVoiceRecorder.vadModule = vadWorker; |
| 273 | + resampler.settings.sendToModules.push(vadWorkerIndex); //add to resampler |
| 274 | + } |
217 | 275 |
|
218 | 276 | //- use either speech-recognition (ASR) or wave-encoder |
219 | 277 | if (useRecognitionModule){ |
|
0 commit comments