@@ -43,15 +43,34 @@ onmessage = function(e) {
4343let inputSampleRate ;
4444let channelCount ;
4545let inputSampleSize ;
46- let processBufferSize ;
46+ let processBufferSize ; //defines '_processRingBuffer' size together with 'inputSampleSize'
4747let vadMode ;
4848let isFloat32Input ; //default false
4949
50- let _processRingBuffer ;
51- let _vadFrames ;
52- let _vadBufferSize ;
50+ let voiceEnergy ;
51+ let voiceEnergyCap = 50 ;
52+ let voiceEnergyDropRate = 2 ;
53+ let _samplesToTimeMsFactor ;
54+
55+ let _processRingBuffer ; //holds multiple vadFrames
56+ let _vadFrames ; //each frame processes one chunk of '_vadBufferSize' as long as '_processRingBuffer' has enough samples
57+ let _vadFrameTimeMs ; //real time (ms) of one vadFrame (defined by sample-rate and buffer size)
58+ let _vadBufferSize ; //size of a single vadFrame (restrictions apply)
5359let _vadBuffer ;
54- let _int16InputBuffer ; //used if input is float32
60+ let _int16InputBuffer ; //used only if input is float32
61+
62+ //sequence control
63+ let useSequenceAnalyzer = false ;
64+ let voiceActivationTime ;
65+ let voiceResetTime ;
66+ let silenceActivationTime ;
67+ let maxSequenceTime ;
68+ let minSequenceTime ;
69+
70+ let _sequenceVoiceTime ;
71+ let _sequenceSilenceTime ;
72+ let _sequenceSawVoice , _sequenceSawSilenceAfterVoice , _sequenceFinishedVoice ;
73+ let _sequenceIsActive , _sequenceIsDone , _sequenceStartedAt ;
5574
5675let _isFirstValidProcess ;
5776
@@ -83,6 +102,11 @@ function init(){
83102 _int16InputBuffer = [ new Int16Array ( inputSampleSize ) ] ;
84103 }
85104
105+ _samplesToTimeMsFactor = 1000 / inputSampleRate ;
106+ _vadFrameTimeMs = Math . round ( _vadBufferSize * _samplesToTimeMsFactor ) ;
107+
108+ resetSequence ( ) ;
109+
86110 _isFirstValidProcess = true ;
87111}
88112function ready ( skipResampler ) {
@@ -96,7 +120,11 @@ function ready(skipResampler){
96120 processBufferSize : processBufferSize ,
97121 vadMode : vadModule . getMode ( ) ,
98122 vadFramesMax : _vadFrames ,
99- vadBufferSize : _vadBufferSize
123+ vadBufferSize : _vadBufferSize ,
124+ vadFrameTimeMs : _vadFrameTimeMs ,
125+ voiceEnergyCap : voiceEnergyCap ,
126+ voiceEnergyDropRate : voiceEnergyDropRate ,
127+ useSequenceAnalyzer : useSequenceAnalyzer
100128 }
101129 } ) ;
102130}
@@ -108,6 +136,18 @@ function constructWorker(options) {
108136 processBufferSize = options . setup . bufferSize || inputSampleSize ;
109137 vadMode = ( options . setup . vadMode != undefined ) ? options . setup . vadMode : 3 ;
110138 isFloat32Input = ( options . setup . isFloat32 != undefined ) ? options . setup . isFloat32 : false ;
139+ if ( options . setup . voiceEnergyCap != undefined ) voiceEnergyCap = options . setup . voiceEnergyCap ;
140+ if ( options . setup . voiceEnergyDropRate ) voiceEnergyDropRate = options . setup . voiceEnergyDropRate ;
141+ if ( options . setup . sequence ) {
142+ useSequenceAnalyzer = true ;
143+ voiceActivationTime = options . setup . sequence . voiceActivationTime || 250 ;
144+ voiceResetTime = options . setup . sequence . voiceResetTime || 1500 ;
145+ silenceActivationTime = options . setup . sequence . silenceActivationTime || 250 ;
146+ maxSequenceTime = options . setup . sequence . maxSequenceTime || 6000 ;
147+ minSequenceTime = options . setup . sequence . minSequenceTime || 600 ;
148+ } else {
149+ useSequenceAnalyzer = false ;
150+ }
111151 init ( ) ;
112152
113153 function onVadLog ( msg ) {
@@ -137,6 +177,85 @@ function constructWorker(options) {
137177 }
138178}
139179
180+ //sequence block
181+ function sequenceDetector ( voiceActivity ) {
182+ if ( voiceActivity == 0 ) {
183+ if ( _sequenceSawVoice ) {
184+ _sequenceSilenceTime += _vadFrameTimeMs ;
185+ if ( _sequenceSilenceTime > voiceResetTime ) {
186+ _sequenceSawSilenceAfterVoice = true ;
187+ } else if ( _sequenceSilenceTime > silenceActivationTime ) {
188+ _sequenceVoiceTime = 0 ;
189+ }
190+ }
191+ } else {
192+ _sequenceVoiceTime += _vadFrameTimeMs ;
193+ if ( ! _sequenceSawVoice && _sequenceVoiceTime > voiceActivationTime ) {
194+ _sequenceSawVoice = true ;
195+ registerEvent ( 1 , 'voice_start' ) ;
196+ } else if ( _sequenceVoiceTime > voiceActivationTime ) {
197+ _sequenceSilenceTime = 0 ;
198+ }
199+ }
200+
201+ if ( _sequenceSawVoice && _sequenceSawSilenceAfterVoice ) {
202+ _sequenceFinishedVoice = true ;
203+ } else if ( _sequenceSawVoice && ( _sequenceVoiceTime > minSequenceTime ) ) {
204+ if ( ! _sequenceIsActive ) {
205+ _sequenceIsActive = true ;
206+ _sequenceStartedAt = Date . now ( ) ;
207+ registerEvent ( 2 , 'sequence_started' ) ;
208+ }
209+ }
210+
211+ if ( _sequenceFinishedVoice ) {
212+ _sequenceIsDone = true ;
213+ registerEvent ( 3 , 'finished_voice' ) ;
214+
215+ } else if ( _sequenceSawVoice ) {
216+ if ( _sequenceIsActive && ( ( Date . now ( ) - _sequenceStartedAt ) > maxSequenceTime ) ) {
217+ _sequenceIsDone = true ;
218+ registerEvent ( 4 , 'finished_voice_maxtime' ) ;
219+ }
220+ }
221+
222+ if ( _sequenceIsDone ) {
223+ if ( _sequenceIsActive ) registerEvent ( 5 , 'sequence_complete' ) ;
224+ resetSequence ( ) ;
225+ }
226+ }
227+ function resetSequence ( ) {
228+ voiceEnergy = 0 ;
229+ _sequenceSawVoice = false ;
230+ _sequenceFinishedVoice = false ;
231+ _sequenceSawSilenceAfterVoice = false ;
232+ _sequenceVoiceTime = 0 ;
233+ _sequenceSilenceTime = 0 ;
234+ _sequenceIsActive = false ;
235+ _sequenceStartedAt = 0 ;
236+ _sequenceIsDone = false ;
237+ }
238+ function registerEvent ( code , msg , data ) {
239+ var msg = {
240+ vadSequenceCode : code ,
241+ vadSequenceMsg : msg
242+ } ;
243+ switch ( code ) {
244+ //case 1: voice start
245+ //case 2: sequence start
246+ //case 3: case 4: finished voice
247+ case 5 :
248+ //sequence complete
249+ msg . vadSequenceStarted = _sequenceStartedAt ;
250+ msg . vadSequenceEnded = Date . now ( ) ;
251+ break ;
252+ default :
253+ break ;
254+ }
255+ //Send info
256+ postMessage ( msg ) ;
257+ }
258+
140259function process ( data ) {
141260 //expected: data.samples, data.sampleRate, data.channels, data.type
142261 //might have: data.rms - TODO: make use of?
@@ -182,11 +301,24 @@ function process(data) {
182301 //activity check
183302 var voiceActivity = vadModule . getVoiceActivity ( inputSampleRate , _vadBuffer [ 0 ] ) ; //TODO: is MONO
184303 vadResults . push ( voiceActivity ) ;
304+
305+ //voice energy and sequence check
306+ if ( voiceActivity ) {
307+ voiceEnergy ++ ;
308+ if ( voiceEnergyCap && voiceEnergy > voiceEnergyCap ) voiceEnergy = voiceEnergyCap ;
309+ } else {
310+ voiceEnergy = voiceEnergy - voiceEnergyDropRate ;
311+ if ( voiceEnergy < 0 ) voiceEnergy = 0 ;
312+ }
313+ if ( useSequenceAnalyzer ) {
314+ sequenceDetector ( voiceActivity ) ;
315+ }
185316 }
186317 if ( vadResults . length > 0 ) {
187318 //Send info
188319 postMessage ( {
189320 voiceActivity : vadResults ,
321+ voiceEnergy : voiceEnergy
190322 } ) ;
191323 }
192324 }
@@ -200,6 +332,7 @@ function handleEvent(data){
200332function start ( options ) {
201333 //TODO: anything to do?
202334 //NOTE: timing of this signal is not very well defined, use only for gating or similar stuff!
335+ resetSequence ( ) ;
203336}
204337function stop ( options ) {
205338 //TODO: anything to do?
0 commit comments