@@ -260,26 +260,30 @@ export class VADStream extends baseStream {
260260 pubSilenceDuration += windowDuration ;
261261 }
262262
263- this . outputWriter . write ( {
264- type : VADEventType . INFERENCE_DONE ,
265- samplesIndex : pubCurrentSample ,
266- timestamp : pubTimestamp ,
267- silenceDuration : pubSilenceDuration ,
268- speechDuration : pubSpeechDuration ,
269- probability : p ,
270- inferenceDuration,
271- frames : [
272- new AudioFrame (
273- inputFrame . data . subarray ( 0 , toCopyInt ) ,
274- this . #inputSampleRate,
275- 1 ,
276- toCopyInt ,
277- ) ,
278- ] ,
279- speaking : pubSpeaking ,
280- rawAccumulatedSilence : silenceThresholdDuration ,
281- rawAccumulatedSpeech : speechThresholdDuration ,
282- } ) ;
263+ if (
264+ ! this . sendVADEvent ( {
265+ type : VADEventType . INFERENCE_DONE ,
266+ samplesIndex : pubCurrentSample ,
267+ timestamp : pubTimestamp ,
268+ silenceDuration : pubSilenceDuration ,
269+ speechDuration : pubSpeechDuration ,
270+ probability : p ,
271+ inferenceDuration,
272+ frames : [
273+ new AudioFrame (
274+ inputFrame . data . subarray ( 0 , toCopyInt ) ,
275+ this . #inputSampleRate,
276+ 1 ,
277+ toCopyInt ,
278+ ) ,
279+ ] ,
280+ speaking : pubSpeaking ,
281+ rawAccumulatedSilence : silenceThresholdDuration ,
282+ rawAccumulatedSpeech : speechThresholdDuration ,
283+ } )
284+ ) {
285+ continue ;
286+ }
283287
284288 const resetWriteCursor = ( ) => {
285289 if ( ! this . #speechBuffer) throw new Error ( 'speechBuffer is empty' ) ;
@@ -314,19 +318,23 @@ export class VADStream extends baseStream {
314318 pubSilenceDuration = 0 ;
315319 pubSpeechDuration = speechThresholdDuration ;
316320
317- this . outputWriter . write ( {
318- type : VADEventType . START_OF_SPEECH ,
319- samplesIndex : pubCurrentSample ,
320- timestamp : pubTimestamp ,
321- silenceDuration : pubSilenceDuration ,
322- speechDuration : pubSpeechDuration ,
323- probability : p ,
324- inferenceDuration,
325- frames : [ copySpeechBuffer ( ) ] ,
326- speaking : pubSpeaking ,
327- rawAccumulatedSilence : 0 ,
328- rawAccumulatedSpeech : 0 ,
329- } ) ;
321+ if (
322+ ! this . sendVADEvent ( {
323+ type : VADEventType . START_OF_SPEECH ,
324+ samplesIndex : pubCurrentSample ,
325+ timestamp : pubTimestamp ,
326+ silenceDuration : pubSilenceDuration ,
327+ speechDuration : pubSpeechDuration ,
328+ probability : p ,
329+ inferenceDuration,
330+ frames : [ copySpeechBuffer ( ) ] ,
331+ speaking : pubSpeaking ,
332+ rawAccumulatedSilence : 0 ,
333+ rawAccumulatedSpeech : 0 ,
334+ } )
335+ ) {
336+ continue ;
337+ }
330338 }
331339 } else {
332340 silenceThresholdDuration += windowDuration ;
@@ -341,19 +349,23 @@ export class VADStream extends baseStream {
341349 pubSpeechDuration = 0 ;
342350 pubSilenceDuration = silenceThresholdDuration ;
343351
344- this . outputWriter . write ( {
345- type : VADEventType . END_OF_SPEECH ,
346- samplesIndex : pubCurrentSample ,
347- timestamp : pubTimestamp ,
348- silenceDuration : pubSilenceDuration ,
349- speechDuration : pubSpeechDuration ,
350- probability : p ,
351- inferenceDuration,
352- frames : [ copySpeechBuffer ( ) ] ,
353- speaking : pubSpeaking ,
354- rawAccumulatedSilence : 0 ,
355- rawAccumulatedSpeech : 0 ,
356- } ) ;
352+ if (
353+ ! this . sendVADEvent ( {
354+ type : VADEventType . END_OF_SPEECH ,
355+ samplesIndex : pubCurrentSample ,
356+ timestamp : pubTimestamp ,
357+ silenceDuration : pubSilenceDuration ,
358+ speechDuration : pubSpeechDuration ,
359+ probability : p ,
360+ inferenceDuration,
361+ frames : [ copySpeechBuffer ( ) ] ,
362+ speaking : pubSpeaking ,
363+ rawAccumulatedSilence : 0 ,
364+ rawAccumulatedSpeech : 0 ,
365+ } )
366+ ) {
367+ continue ;
368+ }
357369
358370 resetWriteCursor ( ) ;
359371 }
0 commit comments