Skip to content

Commit f04a5c8

Browse files
committed
added WebRTC VAD worker + module + interface
1 parent c51a7f1 commit f04a5c8

File tree

4 files changed

+310
-0
lines changed

4 files changed

+310
-0
lines changed

src/modules/shared/webrtc-vad-interface.min.js

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/modules/shared/webrtc-vad-wasm.js

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/modules/webrtc-vad-worker.js

Lines changed: 221 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,221 @@
1+
//imports
2+
importScripts('./shared/common.js');
3+
importScripts('./shared/ring-buffer.min.js');
4+
importScripts('./shared/webrtc-vad-interface.min.js');
5+
importScripts('./shared/webrtc-vad-wasm.js');
6+
7+
var vadModule;
8+
9+
onmessage = function(e) {
10+
//Audio worker interface
11+
//console.log("WebRtcVadWorker onmessage", e.data); //DEBUG
12+
if (e.data.ctrl){
13+
switch (e.data.ctrl.action){
14+
case "construct":
15+
constructWorker(e.data.ctrl.options);
16+
break;
17+
case "process":
18+
process(e.data.ctrl.data);
19+
break;
20+
case "handle":
21+
handleEvent(e.data.ctrl.data);
22+
break;
23+
case "start":
24+
start(e.data.ctrl.options);
25+
break;
26+
case "stop":
27+
stop(e.data.ctrl.options);
28+
break;
29+
case "reset":
30+
reset(e.data.ctrl.options);
31+
break;
32+
case "release":
33+
case "close":
34+
release(e.data.ctrl.options);
35+
break;
36+
default:
37+
console.log("Unknown control message:", e.data);
38+
break;
39+
}
40+
}
41+
};
42+
43+
let inputSampleRate;
44+
let channelCount;
45+
let inputSampleSize;
46+
let processBufferSize;
47+
let vadMode;
48+
let isFloat32Input; //default false
49+
50+
let _processRingBuffer;
51+
let _vadFrames;
52+
let _vadBufferSize;
53+
let _vadBuffer;
54+
let _int16InputBuffer; //used if input is float32
55+
56+
let _isFirstValidProcess;
57+
58+
function init(){
59+
if (inputSampleSize > processBufferSize){
60+
throw JSON.stringify(new BufferSizeException("Processor 'bufferSize' has to be bigger than 'inputSampleSize'! Currently: " + inputSampleSize + " > " + processBufferSize));
61+
//NOTE: this needs to be a string to show up in worker.onerror properly :-/
62+
}
63+
//requirements for sampleRate: 8000, 16000, 32000, 48000 - sampleLength: sampleRate/1000 * (10|20|30) => 48k - 480, 960, 1440 ; 16k - 160, 320, 480;
64+
if (![8000, 16000, 32000, 48000].includes(inputSampleRate)){
65+
throw JSON.stringify(new SampleRateException("For this module sample-rate has to be one of: 8000, 16000, 32000, 48000"));
66+
}
67+
var allowedBufferSizes = [inputSampleRate/1000 * 30, inputSampleRate/1000 * 20, inputSampleRate/1000 * 10]; //10, 20 and 30ms frames
68+
_vadBufferSize = 0;
69+
for (let i=0; i<allowedBufferSizes.length; i++){
70+
if (processBufferSize == allowedBufferSizes[i] || processBufferSize % allowedBufferSizes[i] == 0){
71+
_vadFrames = processBufferSize / allowedBufferSizes[i];
72+
_vadBufferSize = allowedBufferSizes[i];
73+
break;
74+
}
75+
}
76+
if (_vadBufferSize == 0){
77+
throw JSON.stringify(new BufferSizeException("For sample-rate '" + inputSampleRate + "' the 'bufferSize' has to be equal or a multiple of: " + allowedBufferSizes.join(", ")));
78+
}
79+
var ringBufferSize = processBufferSize + inputSampleSize; //TODO: check size again
80+
_processRingBuffer = new RingBuffer(ringBufferSize, channelCount, "Int16");
81+
_vadBuffer = [new Int16Array(_vadBufferSize)];
82+
if (isFloat32Input){
83+
_int16InputBuffer = [new Int16Array(inputSampleSize)];
84+
}
85+
86+
_isFirstValidProcess = true;
87+
}
88+
function ready(skipResampler){
89+
postMessage({
90+
moduleState: 1,
91+
moduleInfo: {
92+
inputSampleRate: inputSampleRate,
93+
channelCount: channelCount,
94+
inputSampleSize: inputSampleSize,
95+
inputIsFloat32: isFloat32Input,
96+
processBufferSize: processBufferSize,
97+
vadMode: vadModule.getMode(),
98+
vadFramesMax: _vadFrames,
99+
vadBufferSize: _vadBufferSize
100+
}
101+
});
102+
}
103+
104+
function constructWorker(options) {
105+
inputSampleRate = options.setup.inputSampleRate || options.setup.ctxInfo.targetSampleRate || options.setup.ctxInfo.sampleRate;
106+
channelCount = 1; //options.setup.channelCount || 1; //TODO: only MONO atm
107+
inputSampleSize = options.setup.inputSampleSize || 512;
108+
processBufferSize = options.setup.bufferSize || inputSampleSize;
109+
vadMode = (options.setup.vadMode != undefined)? options.setup.vadMode : 3;
110+
isFloat32Input = (options.setup.isFloat32 != undefined)? options.setup.isFloat32 : false;
111+
init();
112+
113+
function onVadLog(msg){
114+
console.error("VadModuleLog -", msg); //DEBUG (use postMessage?)
115+
}
116+
function onVadError(msg){
117+
console.error("VadModuleError -", msg);
118+
throw {name: "VadModuleError", message: msg}; //TODO: this probably needs to be a string to show up in worker.onerror properly :-/
119+
}
120+
121+
//prepare
122+
if (!vadModule){
123+
onVadLog("Init. WebRTC VAD WASM module"); //DEBUG
124+
new WebRtcVoiceActivityDetector({
125+
onInfo: onVadLog,
126+
onError: onVadError,
127+
onStatusMessage: onVadLog,
128+
mode: vadMode
129+
}, function(v){
130+
onVadLog("WebRTC VAD ready"); //DEBUG
131+
vadModule = v;
132+
ready();
133+
});
134+
}else{
135+
onVadLog("WebRTC VAD module already loaded"); //DEBUG
136+
ready();
137+
}
138+
}
139+
140+
function process(data) {
141+
//expected: data.samples, data.sampleRate, data.channels, data.type
142+
//might have: data.rms - TODO: make use of?
143+
if (data && data.samples){
144+
//Use 1st input and output only
145+
let input = data.samples;
146+
let thisInputSampleSize = input[0].length;
147+
148+
if (_isFirstValidProcess){
149+
_isFirstValidProcess = false;
150+
//check: inputSampleRate, inputSampleSize, channelCount, float32
151+
if (data.sampleRate != inputSampleRate){
152+
var msg = "Sample-rate mismatch! Should be '" + inputSampleRate + "' is '" + data.sampleRate + "'";
153+
console.error("Audio Worker sample-rate exception - Msg.: " + msg);
154+
throw JSON.stringify(new SampleRateException(msg)); //NOTE: this needs to be a string to show up in worker.onerror properly :-/
155+
return;
156+
}
157+
var inputArrayType = data.type || data.samples[0].constructor.name;
158+
var isFloat32 = (inputArrayType.indexOf("Float32") >= 0);
159+
if (isFloat32 != isFloat32Input){
160+
var msg = "Array type mismatch! Input samples are of type '" + inputArrayType + "' but expected: " + (isFloat32Input? "Float32" : "Int16");
161+
console.error("Audio Worker type exception - Msg.: " + msg);
162+
throw JSON.stringify(new ArrayTypeException(msg)); //NOTE: this needs to be a string to show up in worker.onerror properly :-/
163+
return;
164+
}
165+
//TODO: should we re-init. instead of fail?
166+
}
167+
168+
//TODO: is MONO
169+
if (isFloat32Input){
170+
CommonConverters.floatTo16BitPCM(_int16InputBuffer[0], input[0]);
171+
_processRingBuffer.push([_int16InputBuffer[0]]);
172+
}else{
173+
_processRingBuffer.push(input);
174+
}
175+
176+
//Process if we have enough frames
177+
var vadResults = [];
178+
while (_processRingBuffer.framesAvailable >= _vadBufferSize) {
179+
//pull samples
180+
_processRingBuffer.pull(_vadBuffer);
181+
182+
//activity check
183+
var voiceActivity = vadModule.getVoiceActivity(inputSampleRate, _vadBuffer[0]); //TODO: is MONO
184+
vadResults.push(voiceActivity);
185+
}
186+
if (vadResults.length > 0){
187+
//Send info
188+
postMessage({
189+
voiceActivity: vadResults,
190+
});
191+
}
192+
}
193+
return true;
194+
}
195+
196+
function handleEvent(data){
197+
//data that should not be processed but might trigger an event
198+
}
199+
200+
function start(options) {
201+
//TODO: anything to do?
202+
//NOTE: timing of this signal is not very well defined, use only for gating or similar stuff!
203+
}
204+
function stop(options) {
205+
//TODO: anything to do?
206+
//NOTE: timing of this signal is not very well defined
207+
}
208+
function reset(options) {
209+
//TODO: clean up worker and prep. for restart
210+
init();
211+
}
212+
function release(options){
213+
//destroy
214+
_processRingBuffer = null;
215+
_vadBuffer = null;
216+
_int16InputBuffer = null;
217+
vadModule = null;
218+
}
219+
220+
//--- helpers ---
221+
//...
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
//WebRTC VAD Interface for SEPIA Web-Audio Processor
2+
var WebRtcVoiceActivityDetector = function(options, initCallback){
3+
this.vadModule = {};
4+
this.options = options || {};
5+
6+
var onPrint = this.options.onInfo || this.options.onPrint || console.log("VAD print", text);
7+
var onError = this.options.onError || this.options.onPrintErr || console.log("VAD printErr", text);
8+
var onStatus = this.options.onStatusMessage || this.options.onSetStatus || console.log("VAD setStatus", text);
9+
var vadMode = (this.options.mode != undefined)? this.options.mode : 3; //modes: 0=Quality mode, 1=Low bitrate mode, 2=Aggressive mode, 3=Very aggressive mode
10+
11+
WebRtcVad({
12+
preRun: [],
13+
postRun: [],
14+
print: function(text){
15+
if (arguments.length > 1) text = Array.prototype.slice.call(arguments).join(' ');
16+
onPrint(text);
17+
},
18+
printErr: function(text){
19+
if (arguments.length > 1) text = Array.prototype.slice.call(arguments).join(' ');
20+
onError(text);
21+
},
22+
setStatus: function(text){
23+
if (arguments.length > 1) text = Array.prototype.slice.call(arguments).join(' ');
24+
onStatus(text);
25+
},
26+
totalDependencies: 0,
27+
monitorRunDependencies: function(left){
28+
this.totalDependencies = Math.max(this.totalDependencies, left);
29+
this.setStatus(left ? 'Preparing... (' + (this.totalDependencies - left) + '/' + this.totalDependencies + ')' : 'All downloads complete.');
30+
},
31+
noInitialRun: true
32+
})
33+
.then(function(Module){
34+
var Vad = {
35+
wasmModule: Module
36+
//setMode: ...,
37+
//getMode: ...,
38+
//process: ...
39+
};
40+
41+
//setup
42+
43+
var main = Module.cwrap('main');
44+
if (main() != 1){
45+
onError("VAD 'main' error");
46+
throw {name: "VadModuleError", message: "Failed to initialize via 'main()'"};
47+
}
48+
49+
Vad.setMode = Module.cwrap('setmode', 'number', ['number']);
50+
Vad.getMode = function(){ return vadMode; };
51+
52+
if (Vad.setMode(vadMode) != 0){
53+
onError("VAD 'setMode' error");
54+
throw {name: "VadModuleError", message: "Failed to set mode"};
55+
}
56+
onPrint("VAD mode: " + vadMode);
57+
58+
Vad.process = Module.cwrap('process_data', 'number', ['number', 'number', 'number', 'number', 'number', 'number']);
59+
//arguments: (dataHeap.byteOffset, sampleLength, sampleRate, samples[0], samples[100], samples[2000])
60+
//requirements for sampleRate: 8000, 16000, 32000, 48000 - sampleLength: sampleRate/1000 * (10|20|30) => 48k - 480, 960, 1440 ; 16k - 160, 320, 480 ;
61+
62+
Vad.getVoiceActivity = function(sampleRate, samplesInt16){
63+
//Get data byte size, allocate memory on Emscripten heap, and get pointer
64+
let nDataBytes = samplesInt16.length * samplesInt16.BYTES_PER_ELEMENT;
65+
let dataPtr = Module._malloc(nDataBytes);
66+
67+
//Copy data to Emscripten heap (directly accessed from Module.HEAPU8)
68+
let dataHeap = new Uint8Array(Module.HEAPU8.buffer, dataPtr, nDataBytes);
69+
dataHeap.set(new Uint8Array(samplesInt16.buffer));
70+
71+
//Call function and get result
72+
let result = Vad.process(dataHeap.byteOffset, samplesInt16.length, sampleRate,
73+
samplesInt16[0], samplesInt16[100], samplesInt16[2000]);
74+
75+
//Free memory
76+
Module._free(dataHeap.byteOffset);
77+
return result; //-1: ERROR, 0: No active speech, 1: Active speech (core function gives 1-6 but output here is 1 for all)
78+
};
79+
80+
return Vad;
81+
})
82+
.then(function(vad){
83+
this.vadModule = vad;
84+
if (initCallback) initCallback(this.vadModule);
85+
});
86+
//Module.setStatus('Downloading...');
87+
}

0 commit comments

Comments
 (0)