Skip to content

Commit 473f182

Browse files
committed
1 parent 7fabc29 commit 473f182

2 files changed

Lines changed: 48 additions & 24 deletions

File tree

src/main/java/org/openasr/idear/nlp/NlpProvider.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,4 @@ package org.openasr.idear.nlp
22

33
interface NlpProvider {
44
fun processUtterance(utterance: String, sessionAttributes: Map<String, String>? = null)
5-
}
5+
}

src/main/java/org/openasr/idear/recognizer/vad/AutocorrellatedVoiceActivityDetector.kt

Lines changed: 47 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,22 @@
1+
/*
2+
Moved to https://github.com/lkuza2/java-speech-api/pull/99
3+
14
package org.openasr.idear.recognizer.vad
25
36
import edu.cmu.sphinx.frontend.DataProcessingException
47
import edu.cmu.sphinx.frontend.util.DataUtil
58
import org.openasr.idear.recognizer.CustomMicrophone
69
import java.io.DataInputStream
7-
import java.io.InputStream
810
import java.util.logging.Logger
911
import javax.sound.sampled.AudioInputStream
1012
import javax.sound.sampled.AudioSystem
11-
import org.apache.commons.lang3.Conversion.byteArrayToShort
1213
import java.io.IOException
1314
14-
15+
*/
1516
/**
17+
* Adapted from
18+
* https://github.com/amaurycrickx/recognito/blob/master/recognito/src/main/java/com/bitsinharmony/recognito/vad/AutocorrellatedVoiceActivityDetector.java
19+
*
1620
* A voice activity detector attempts to detect presence or abscence of voice in the signal.
1721
* <p>
1822
* The technique used here is a simple (but efficient) one based on a characteristic of (white) noise :
@@ -27,17 +31,21 @@ import java.io.IOException
2731
*
2832
* TODO: incorporate https://www.researchgate.net/publication/255667085_A_simple_but_efficient_real-time_voice_activity_detection_algorithm
2933
* @author Amaury Crickx
30-
*/
34+
*//*
3135
class AutocorrellatedVoiceActivityDetector { //(val sampleRate: Int = 16_000) {
3236
private val WINDOW_MILLIS = 1
3337
private val FADE_MILLIS = 2
3438
private val MIN_SILENCE_MILLIS = 4
3539
private val MIN_VOICE_MILLIS = 200
3640
private val MAX_VOICE_MILLIS = 60_000
3741
38-
/** the noise threshold used to determine if a given section is silence or not */
42+
*/
43+
/** the noise threshold used to determine if a given section is silence or not *//*
44+
3945
var threshold = 0.0001
4046
47+
private var bytesPerValue: Int = 0
48+
private var totalValuesRead: Int = 0
4149
private var fadeInFactors: DoubleArray? = null
4250
private var fadeOutFactors: DoubleArray? = null
4351
@@ -52,7 +60,7 @@ class AutocorrellatedVoiceActivityDetector { //(val sampleRate: Int = 16_000) {
5260
5361
val windowSize = WINDOW_MILLIS * oneMilliInSamples
5462
val correllation = DoubleArray(windowSize)
55-
val window = DoubleArray(windowSize)
63+
var window: DoubleArray // = DoubleArray(windowSize)
5664
var position: Int
5765
var activityStart: Int
5866
var data = DataInputStream(inStream)
@@ -61,7 +69,7 @@ class AutocorrellatedVoiceActivityDetector { //(val sampleRate: Int = 16_000) {
6169
val thread = Thread({
6270
try {
6371
while (true) {
64-
read
72+
window = readFrame(inStream)
6573
6674
6775
val mean = bruteForceAutocorrelation(window, correllation)
@@ -87,6 +95,7 @@ class AutocorrellatedVoiceActivityDetector { //(val sampleRate: Int = 16_000) {
8795
return thread
8896
}
8997
98+
// https://github.com/tilo/cmusphinx-1/blob/master/sphinx4/src/sphinx4/edu/cmu/sphinx/frontend/util/AudioFileDataSource.java
9099
fun readFrame(dataStream: AudioInputStream): DoubleArray {
91100
// read one frame's worth of bytes
92101
val bigEndian = dataStream.format.isBigEndian
@@ -103,7 +112,7 @@ class AutocorrellatedVoiceActivityDetector { //(val sampleRate: Int = 16_000) {
103112
}
104113
} while (read != -1 && totalRead < bytesToRead)
105114
if (totalRead <= 0) {
106-
closeDataStream()
115+
// closeDataStream()
107116
return null
108117
}
109118
// shrink incomplete frames
@@ -116,7 +125,7 @@ class AutocorrellatedVoiceActivityDetector { //(val sampleRate: Int = 16_000) {
116125
val shrinkedBuffer = ByteArray(totalRead)
117126
System.arraycopy(samplesBuffer, 0, shrinkedBuffer, 0, totalRead)
118127
samplesBuffer = shrinkedBuffer
119-
closeDataStream()
128+
// closeDataStream()
120129
}
121130
} catch (ioe: IOException) {
122131
throw DataProcessingException("Error reading data", ioe)
@@ -133,12 +142,14 @@ class AutocorrellatedVoiceActivityDetector { //(val sampleRate: Int = 16_000) {
133142
return doubleData
134143
}
135144
136-
/**
145+
*/
146+
/**
137147
* Removes silence out of the given voice sample
138148
* @param voiceSample the voice sample
139149
* *
140150
* @return a new voice sample with silence removed
141-
*/
151+
*//*
152+
142153
fun removeSilence(voiceSample: DoubleArray, sampleRate: Int = 16_000): DoubleArray {
143154
val oneMilliInSamples = sampleRate / 1000
144155
@@ -198,12 +209,14 @@ class AutocorrellatedVoiceActivityDetector { //(val sampleRate: Int = 16_000) {
198209
}
199210
}
200211
201-
/**
212+
*/
213+
/**
202214
* Gets the minimum voice activity length that will be considered by the remove silence method
203215
* @param sampleRate the sample rate
204216
* *
205217
* @return the length
206-
*/
218+
*//*
219+
207220
fun getMinimumVoiceActivityLength(sampleRate: Int): Int {
208221
return MIN_VOICE_MILLIS * sampleRate / 1000
209222
}
@@ -212,7 +225,8 @@ class AutocorrellatedVoiceActivityDetector { //(val sampleRate: Int = 16_000) {
212225
return MAX_VOICE_MILLIS * sampleRate / 1000
213226
}
214227
215-
/**
228+
*/
229+
/**
216230
* Applies a linear fade in / out to the given portion of audio (removes unwanted cracks)
217231
* @param voiceSample the voice sample
218232
* *
@@ -221,7 +235,8 @@ class AutocorrellatedVoiceActivityDetector { //(val sampleRate: Int = 16_000) {
221235
* @param startIndex fade in start point
222236
* *
223237
* @param endIndex fade out end point
224-
*/
238+
*//*
239+
225240
private fun applyFadeInFadeOut(voiceSample: DoubleArray, fadeLength: Int, startIndex: Int, endIndex: Int) {
226241
val fadeOutStart = endIndex - fadeLength
227242
for (j in 0..fadeLength - 1) {
@@ -230,14 +245,16 @@ class AutocorrellatedVoiceActivityDetector { //(val sampleRate: Int = 16_000) {
230245
}
231246
}
232247
233-
/**
248+
*/
249+
/**
234250
* Merges small active areas
235251
* @param result the voice activity result
236252
* *
237253
* @param minActivityLength the minimum length to apply
238254
* *
239255
* @return a count of silent elements
240-
*/
256+
*//*
257+
241258
private fun mergeSmallActiveAreas(result: BooleanArray, minActivityLength: Int): Int {
242259
var active: Boolean
243260
var increment: Int
@@ -262,12 +279,14 @@ class AutocorrellatedVoiceActivityDetector { //(val sampleRate: Int = 16_000) {
262279
return silenceCounter
263280
}
264281
265-
/**
282+
*/
283+
/**
266284
* Merges small silent areas
267285
* @param result the voice activity result
268286
* *
269287
* @param minSilenceLength the minimum silence length to apply
270-
*/
288+
*//*
289+
271290
private fun mergeSmallSilentAreas(result: BooleanArray, minSilenceLength: Int) {
272291
var active: Boolean
273292
var increment: Int
@@ -286,10 +305,12 @@ class AutocorrellatedVoiceActivityDetector { //(val sampleRate: Int = 16_000) {
286305
}
287306
}
288307
289-
/**
308+
*/
309+
/**
290310
* Initialize the fade in/ fade out factors properties
291311
* @param fadeLength
292-
*/
312+
*//*
313+
293314
private fun initFadeFactors(fadeLength: Int) {
294315
val fadeInFactors = DoubleArray(fadeLength)
295316
val fadeOutFactors = DoubleArray(fadeLength)
@@ -303,14 +324,16 @@ class AutocorrellatedVoiceActivityDetector { //(val sampleRate: Int = 16_000) {
303324
this.fadeOutFactors = fadeOutFactors
304325
}
305326
306-
/**
327+
*/
328+
/**
307329
* Applies autocorrelation in O² operations. Keep arrays very short !
308330
* @param voiceSample the voice sample buffer
309331
* *
310332
* @param correllation the correlation buffer
311333
* *
312334
* @return the mean correlation value
313-
*/
335+
*//*
336+
314337
private fun bruteForceAutocorrelation(voiceSample: DoubleArray, correllation: DoubleArray): Double {
315338
correllation.fill(0.0)
316339
val n = voiceSample.size
@@ -327,3 +350,4 @@ class AutocorrellatedVoiceActivityDetector { //(val sampleRate: Int = 16_000) {
327350
private val logger = Logger.getLogger(CustomMicrophone::class.java.simpleName)
328351
}
329352
}
353+
*/

0 commit comments

Comments
 (0)