Skip to content

Commit 43a5efe

Browse files
committed
more VAD worker options and sequence check
1 parent fbff9ec commit 43a5efe

File tree

3 files changed

+205
-15
lines changed

3 files changed

+205
-15
lines changed

src/modules/webrtc-vad-worker.js

Lines changed: 139 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -43,15 +43,34 @@ onmessage = function(e) {
4343
let inputSampleRate;
4444
let channelCount;
4545
let inputSampleSize;
46-
let processBufferSize;
46+
let processBufferSize; //defines '_processRingBuffer' size together with 'inputSampleSize'
4747
let vadMode;
4848
let isFloat32Input; //default false
4949

50-
let _processRingBuffer;
51-
let _vadFrames;
52-
let _vadBufferSize;
50+
let voiceEnergy;
51+
let voiceEnergyCap = 50;
52+
let voiceEnergyDropRate = 2;
53+
let _samplesToTimeMsFactor;
54+
55+
let _processRingBuffer; //holds multiple vadFrames
56+
let _vadFrames; //each frame processes one chunk of '_vadBufferSize' as long as '_processRingBuffer' has enough samples
57+
let _vadFrameTimeMs; //real time (ms) of one vadFrame (defined by sample-rate and buffer size)
58+
let _vadBufferSize; //size of a single vadFrame (restrictions apply)
5359
let _vadBuffer;
54-
let _int16InputBuffer; //used if input is float32
60+
let _int16InputBuffer; //used only if input is float32
61+
62+
//sequence control
63+
let useSequenceAnalyzer = false;
64+
let voiceActivationTime;
65+
let voiceResetTime;
66+
let silenceActivationTime;
67+
let maxSequenceTime;
68+
let minSequenceTime;
69+
70+
let _sequenceVoiceTime;
71+
let _sequenceSilenceTime;
72+
let _sequenceSawVoice, _sequenceSawSilenceAfterVoice, _sequenceFinishedVoice;
73+
let _sequenceIsActive, _sequenceIsDone, _sequenceStartedAt;
5574

5675
let _isFirstValidProcess;
5776

@@ -83,6 +102,11 @@ function init(){
83102
_int16InputBuffer = [new Int16Array(inputSampleSize)];
84103
}
85104

105+
_samplesToTimeMsFactor = 1000/inputSampleRate;
106+
_vadFrameTimeMs = Math.round(_vadBufferSize * _samplesToTimeMsFactor);
107+
108+
resetSequence();
109+
86110
_isFirstValidProcess = true;
87111
}
88112
function ready(skipResampler){
@@ -96,7 +120,11 @@ function ready(skipResampler){
96120
processBufferSize: processBufferSize,
97121
vadMode: vadModule.getMode(),
98122
vadFramesMax: _vadFrames,
99-
vadBufferSize: _vadBufferSize
123+
vadBufferSize: _vadBufferSize,
124+
vadFrameTimeMs: _vadFrameTimeMs,
125+
voiceEnergyCap: voiceEnergyCap,
126+
voiceEnergyDropRate: voiceEnergyDropRate,
127+
useSequenceAnalyzer: useSequenceAnalyzer
100128
}
101129
});
102130
}
@@ -108,6 +136,18 @@ function constructWorker(options) {
108136
processBufferSize = options.setup.bufferSize || inputSampleSize;
109137
vadMode = (options.setup.vadMode != undefined)? options.setup.vadMode : 3;
110138
isFloat32Input = (options.setup.isFloat32 != undefined)? options.setup.isFloat32 : false;
139+
if (options.setup.voiceEnergyCap != undefined) voiceEnergyCap = options.setup.voiceEnergyCap;
140+
if (options.setup.voiceEnergyDropRate) voiceEnergyDropRate = options.setup.voiceEnergyDropRate;
141+
if (options.setup.sequence){
142+
useSequenceAnalyzer = true;
143+
voiceActivationTime = options.setup.sequence.voiceActivationTime || 250;
144+
voiceResetTime = options.setup.sequence.voiceResetTime || 1500;
145+
silenceActivationTime = options.setup.sequence.silenceActivationTime || 250;
146+
maxSequenceTime = options.setup.sequence.maxSequenceTime || 6000;
147+
minSequenceTime = options.setup.sequence.minSequenceTime || 600;
148+
}else{
149+
useSequenceAnalyzer = false;
150+
}
111151
init();
112152

113153
function onVadLog(msg){
@@ -137,6 +177,85 @@ function constructWorker(options) {
137177
}
138178
}
139179

180+
//sequence block
181+
function sequenceDetector(voiceActivity){
182+
if (voiceActivity == 0){
183+
if (_sequenceSawVoice){
184+
_sequenceSilenceTime += _vadFrameTimeMs;
185+
if (_sequenceSilenceTime > voiceResetTime){
186+
_sequenceSawSilenceAfterVoice = true;
187+
}else if (_sequenceSilenceTime > silenceActivationTime){
188+
_sequenceVoiceTime = 0;
189+
}
190+
}
191+
}else{
192+
_sequenceVoiceTime += _vadFrameTimeMs;
193+
if (!_sequenceSawVoice && _sequenceVoiceTime > voiceActivationTime){
194+
_sequenceSawVoice = true;
195+
registerEvent(1, 'voice_start');
196+
}else if (_sequenceVoiceTime > voiceActivationTime){
197+
_sequenceSilenceTime = 0;
198+
}
199+
}
200+
201+
if (_sequenceSawVoice && _sequenceSawSilenceAfterVoice){
202+
_sequenceFinishedVoice = true;
203+
}else if (_sequenceSawVoice && (_sequenceVoiceTime > minSequenceTime)){
204+
if (!_sequenceIsActive){
205+
_sequenceIsActive = true;
206+
_sequenceStartedAt = Date.now();
207+
registerEvent(2, 'sequence_started');
208+
}
209+
}
210+
211+
if (_sequenceFinishedVoice){
212+
_sequenceIsDone = true;
213+
registerEvent(3, 'finished_voice');
214+
215+
}else if (_sequenceSawVoice){
216+
if (_sequenceIsActive && ((Date.now() - _sequenceStartedAt) > maxSequenceTime)) {
217+
_sequenceIsDone = true;
218+
registerEvent(4, 'finished_voice_maxtime');
219+
}
220+
}
221+
222+
if (_sequenceIsDone){
223+
if (_sequenceIsActive) registerEvent(5, 'sequence_complete');
224+
resetSequence();
225+
}
226+
}
227+
function resetSequence(){
228+
voiceEnergy = 0;
229+
_sequenceSawVoice = false;
230+
_sequenceFinishedVoice = false;
231+
_sequenceSawSilenceAfterVoice = false;
232+
_sequenceVoiceTime = 0;
233+
_sequenceSilenceTime = 0;
234+
_sequenceIsActive = false;
235+
_sequenceStartedAt = 0;
236+
_sequenceIsDone = false;
237+
}
238+
function registerEvent(code, msg, data){
239+
var msg = {
240+
vadSequenceCode: code,
241+
vadSequenceMsg: msg
242+
};
243+
switch (code){
244+
//case 1: voice start
245+
//case 2: sequence start
246+
//case 3: case 4: finished voice
247+
case 5:
248+
//sequence complete
249+
msg.vadSequenceStarted = _sequenceStartedAt;
250+
msg.vadSequenceEnded = Date.now();
251+
break;
252+
default:
253+
break;
254+
}
255+
//Send info
256+
postMessage(msg);
257+
}
258+
140259
function process(data) {
141260
//expected: data.samples, data.sampleRate, data.channels, data.type
142261
//might have: data.rms - TODO: make use of?
@@ -182,11 +301,24 @@ function process(data) {
182301
//activity check
183302
var voiceActivity = vadModule.getVoiceActivity(inputSampleRate, _vadBuffer[0]); //TODO: is MONO
184303
vadResults.push(voiceActivity);
304+
305+
//voice energy and sequence check
306+
if (voiceActivity){
307+
voiceEnergy++;
308+
if (voiceEnergyCap && voiceEnergy > voiceEnergyCap) voiceEnergy = voiceEnergyCap;
309+
}else{
310+
voiceEnergy = voiceEnergy - voiceEnergyDropRate;
311+
if (voiceEnergy < 0) voiceEnergy = 0;
312+
}
313+
if (useSequenceAnalyzer){
314+
sequenceDetector(voiceActivity);
315+
}
185316
}
186317
if (vadResults.length > 0){
187318
//Send info
188319
postMessage({
189320
voiceActivity: vadResults,
321+
voiceEnergy: voiceEnergy
190322
});
191323
}
192324
}
@@ -200,6 +332,7 @@ function handleEvent(data){
200332
function start(options) {
201333
//TODO: anything to do?
202334
//NOTE: timing of this signal is not very well defined, use only for gating or similar stuff!
335+
resetSequence();
203336
}
204337
function stop(options) {
205338
//TODO: anything to do?

test-commons.js

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ function addWaveToPage(wavAudio, targetEle){
2626
targetEle.appendChild(audioBox);
2727
}
2828

29+
uPlot.lazy.colorPalette[0] = "#ceff1a"; //default color for first line in graph
2930
var fixedPlots = {
3031
1: {
3132
graph: (new uPlot.lazy.AutoSeries(document.getElementById('chart1'), 150, {
@@ -48,7 +49,15 @@ var fixedPlots = {
4849
yRange: [-0.1, 1.1]
4950
})),
5051
use: (document.getElementById("usePlot3")? document.getElementById("usePlot3").checked : true)
51-
}
52+
},
53+
4: {
54+
graph: (new uPlot.lazy.AutoSeries(document.getElementById('chart4'), 150, {
55+
rememberMax: true
56+
}, {
57+
fill: ["#ceff1a1a"]
58+
})),
59+
use: (document.getElementById("usePlot4")? document.getElementById("usePlot4").checked : true)
60+
},
5261
}
5362
function usePlot(index, ele){
5463
var p = fixedPlots[index];
@@ -58,7 +67,6 @@ function usePlot(index, ele){
5867
if (container) container.style.display = p.use? "" : "none";
5968
}
6069
}
61-
uPlot.lazy.colorPalette[0] = "#ceff1a"; //default color for first line in graph
6270

6371
function addChartContainerToPage(){
6472
var ele = document.createElement("div");

test.html

Lines changed: 56 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -106,17 +106,19 @@ <h1>SEPIA Web Audio Processor</h1>
106106
</div>
107107
<div class="contorlGroup">
108108
<label><b>Switches:</b></label>
109-
<label>Use volume-proc.:</label><input type="checkbox" id="useVolumeProcessor" checked onchange="useVolumeProcessor(this.checked);">
110-
<label>Use resampler:</label><input type="checkbox" id="useResampler" checked onchange="useResampler(this.checked);">
111-
<label>Use wave-encoder:</label><input type="checkbox" id="useWaveEncoder" checked onchange="useWaveEncoder(this.checked);">
112-
<label>Use VAD:</label><input type="checkbox" id="useVadWorker" checked onchange="useVadWorker(this.checked);">
109+
<label>Volume-proc.:</label><input type="checkbox" id="useVolumeProcessor" checked onchange="useVolumeProcessor(this.checked);">
110+
<label>Resampler:</label><input type="checkbox" id="useResampler" checked onchange="useResampler(this.checked);">
111+
<label>Wave-encoder:</label><input type="checkbox" id="useWaveEncoder" checked onchange="useWaveEncoder(this.checked);">
112+
<label>VAD:</label><input type="checkbox" id="useVadWorker" checked onchange="useVadWorker(this.checked);">
113113
<label>Plot 1:</label><input type="checkbox" id="usePlot1" checked onchange="usePlot(1, this);">
114114
<label>Plot 2:</label><input type="checkbox" id="usePlot2" checked onchange="usePlot(2, this);">
115115
<label>Plot 3:</label><input type="checkbox" id="usePlot3" checked onchange="usePlot(3, this);">
116+
<label>Plot 4:</label><input type="checkbox" id="usePlot4" checked onchange="usePlot(4, this);">
116117
</div>
117118
<div id="chart1" class="chart"></div>
118119
<div id="chart2" class="chart"></div>
119120
<div id="chart3" class="chart"></div>
121+
<div id="chart4" class="chart"></div>
120122
</div>
121123
<script type='text/javascript' src="test-commons.js"></script>
122124
<script type='text/javascript'>
@@ -125,6 +127,7 @@ <h1>SEPIA Web Audio Processor</h1>
125127
var targetSampleRateSelector = document.getElementById("sampleRateSelector");
126128
targetSampleRateSelector.onchange = function(){
127129
targetSampleRate = +targetSampleRateSelector.value;
130+
setResamplerSampleRate(targetSampleRateSelector, true);
128131
};
129132
var sourceSelector = document.getElementById("sourceTypeSelector");
130133
var logElement = document.getElementById('logMessages');
@@ -319,7 +322,16 @@ <h1>SEPIA Web Audio Processor</h1>
319322
inputSampleRate: (doUseResampler? resamplerSampleRate : 0), //output sampleRate of previous module, if undefined take targetSampleRate or inputSampleRate
320323
inputSampleSize: resamplerBufferSize, //output bufferSize of previous module
321324
bufferSize: vadWorkerBufferSize,
322-
vadMode: vadWorkerVadMode
325+
vadMode: vadWorkerVadMode,
326+
//voiceEnergyCap: 50,
327+
//voiceEnergyDropRate: 2,
328+
sequence: {
329+
//voiceActivationTime: 250,
330+
//voiceResetTime: 1500,
331+
//silenceActivationTime: 250,
332+
maxSequenceTime: 6000,
333+
minSequenceTime: 600
334+
}
323335
}
324336
}
325337
}
@@ -532,7 +544,32 @@ <h1>SEPIA Web Audio Processor</h1>
532544
if (data.voiceActivity != undefined){
533545
plotData(data.voiceActivity, 3);
534546
}
547+
if (data.voiceEnergy != undefined){
548+
plotData(data.voiceEnergy, 4);
549+
}
550+
if (data.vadSequenceCode != undefined){
551+
console.log("VAD sequence event: " + data.vadSequenceMsg);
552+
//1: voice start, 2: sequence started, 3: voice finished, 4: voice finished max. time, 5: full sequence complete
553+
if (data.vadSequenceCode == 2 && doUseWaveEncoder && vadAutoActivate){
554+
vadAutoActivate = false;
555+
vadAutoActivateSequenceWasTrigger = false;
556+
waveEncoderSetGate('open');
557+
}
558+
if (waveEncoderGateOpen && data.vadSequenceCode == 5){
559+
if ((data.vadSequenceEnded - data.vadSequenceStarted) > vadMinRecordTimeAfterActivate){
560+
waveEncoderSetGate('close');
561+
}else{
562+
vadAutoActivateSequenceWasTrigger = true;
563+
}
564+
}
565+
if (waveEncoderGateOpen && vadAutoActivateSequenceWasTrigger && (data.vadSequenceCode == 3 || data.vadSequenceCode == 4 )){
566+
waveEncoderSetGate('close');
567+
}
568+
}
535569
}
570+
var vadAutoActivate = true;
571+
var vadAutoActivateSequenceWasTrigger = false;
572+
var vadMinRecordTimeAfterActivate = 3000;
536573

537574
var doUseVolumeProcessor = document.getElementById("useVolumeProcessor").checked;
538575
var volMeterOut = document.getElementById('volumeMeterNodeOutput');
@@ -581,8 +618,14 @@ <h1>SEPIA Web Audio Processor</h1>
581618
resamplerGainShow.value = resamplerGain.value;
582619
resamplerGain.oninput = function(){ resamplerGainShow.value = resamplerGain.value; };
583620
resamplerGainShow.oninput = function(){ resamplerGain.value = resamplerGainShow.value; };
584-
function setResamplerSampleRate(inputEle){
585-
resamplerSampleRate = +inputEle.value;
621+
function setResamplerSampleRate(inputEle, updateUi){
622+
console.log('setResamplerSampleRate', +inputEle.value, updateUi);
623+
if (updateUi){
624+
resamplerSampleRate = +inputEle.value || 16000;
625+
document.getElementById("resamplerSampleRate").value = resamplerSampleRate;
626+
}else{
627+
resamplerSampleRate = +inputEle.value;
628+
}
586629
}
587630
function setResamplerBufferSize(inputEle){
588631
resamplerBufferSize = +inputEle.value;
@@ -611,10 +654,16 @@ <h1>SEPIA Web Audio Processor</h1>
611654
var waveEncoderLookback = +document.getElementById("waveEncoderLookback").value; //e.g. 0;
612655
var waveEncoderRecordMax = +document.getElementById("waveEncoderRecordMax").value; //e.g. 6000;
613656
var waveEncoderGate = document.getElementById('waveEncoderGateState');
657+
var waveEncoderGateOpen = false;
614658
function waveEncoderSetGate(state){
615659
window.waveEncoder.handle.sendToModule({gate: state});
616660
//waveEncoderGate.textContent = state;
617661
waveEncoderGate.className = state;
662+
waveEncoderGateOpen = (state == "open");
663+
if (waveEncoderGateOpen == false){
664+
vadAutoActivate = true;
665+
}
666+
console.log("waveEncoderSetGate", state);
618667
}
619668
function waveEncoderGetWave(){
620669
window.waveEncoder.handle.sendToModule({request: {get: "wave"}});

0 commit comments

Comments
 (0)