1+ /*
2+ Moved to https://github.com/lkuza2/java-speech-api/pull/99
3+
14package org.openasr.idear.recognizer.vad
25
36import edu.cmu.sphinx.frontend.DataProcessingException
47import edu.cmu.sphinx.frontend.util.DataUtil
58import org.openasr.idear.recognizer.CustomMicrophone
69import java.io.DataInputStream
7- import java.io.InputStream
810import java.util.logging.Logger
911import javax.sound.sampled.AudioInputStream
1012import javax.sound.sampled.AudioSystem
11- import org.apache.commons.lang3.Conversion.byteArrayToShort
1213import java.io.IOException
1314
14-
15+ */
1516/* *
17+ * Adapted from
18+ * https://github.com/amaurycrickx/recognito/blob/master/recognito/src/main/java/com/bitsinharmony/recognito/vad/AutocorrellatedVoiceActivityDetector.java
19+ *
1620 * A voice activity detector attempts to detect presence or abscence of voice in the signal.
1721 * <p>
1822 * The technique used here is a simple (but efficient) one based on a characteristic of (white) noise :
@@ -27,17 +31,21 @@ import java.io.IOException
2731 *
2832 * TODO: incorporate https://www.researchgate.net/publication/255667085_A_simple_but_efficient_real-time_voice_activity_detection_algorithm
2933 * @author Amaury Crickx
30- */
34+ */ /*
3135class AutocorrellatedVoiceActivityDetector { //(val sampleRate: Int = 16_000) {
3236 private val WINDOW_MILLIS = 1
3337 private val FADE_MILLIS = 2
3438 private val MIN_SILENCE_MILLIS = 4
3539 private val MIN_VOICE_MILLIS = 200
3640 private val MAX_VOICE_MILLIS = 60_000
3741
38- /* * the noise threshold used to determine if a given section is silence or not */
42+ */
43+ /* * the noise threshold used to determine if a given section is silence or not */ /*
44+
3945 var threshold = 0.0001
4046
47+ private var bytesPerValue: Int = 0
48+ private var totalValuesRead: Int = 0
4149 private var fadeInFactors: DoubleArray? = null
4250 private var fadeOutFactors: DoubleArray? = null
4351
@@ -52,7 +60,7 @@ class AutocorrellatedVoiceActivityDetector { //(val sampleRate: Int = 16_000) {
5260
5361 val windowSize = WINDOW_MILLIS * oneMilliInSamples
5462 val correllation = DoubleArray(windowSize)
55- val window = DoubleArray (windowSize)
63+ var window: DoubleArray // = DoubleArray(windowSize)
5664 var position: Int
5765 var activityStart: Int
5866 var data = DataInputStream(inStream)
@@ -61,7 +69,7 @@ class AutocorrellatedVoiceActivityDetector { //(val sampleRate: Int = 16_000) {
6169 val thread = Thread({
6270 try {
6371 while (true) {
64- read
72+ window = readFrame(inStream)
6573
6674
6775 val mean = bruteForceAutocorrelation(window, correllation)
@@ -87,6 +95,7 @@ class AutocorrellatedVoiceActivityDetector { //(val sampleRate: Int = 16_000) {
8795 return thread
8896 }
8997
98+ // https://github.com/tilo/cmusphinx-1/blob/master/sphinx4/src/sphinx4/edu/cmu/sphinx/frontend/util/AudioFileDataSource.java
9099 fun readFrame(dataStream: AudioInputStream): DoubleArray {
91100 // read one frame's worth of bytes
92101 val bigEndian = dataStream.format.isBigEndian
@@ -103,7 +112,7 @@ class AutocorrellatedVoiceActivityDetector { //(val sampleRate: Int = 16_000) {
103112 }
104113 } while (read != -1 && totalRead < bytesToRead)
105114 if (totalRead <= 0) {
106- closeDataStream()
115+ // closeDataStream()
107116 return null
108117 }
109118 // shrink incomplete frames
@@ -116,7 +125,7 @@ class AutocorrellatedVoiceActivityDetector { //(val sampleRate: Int = 16_000) {
116125 val shrinkedBuffer = ByteArray(totalRead)
117126 System.arraycopy(samplesBuffer, 0, shrinkedBuffer, 0, totalRead)
118127 samplesBuffer = shrinkedBuffer
119- closeDataStream()
128+ // closeDataStream()
120129 }
121130 } catch (ioe: IOException) {
122131 throw DataProcessingException("Error reading data", ioe)
@@ -133,12 +142,14 @@ class AutocorrellatedVoiceActivityDetector { //(val sampleRate: Int = 16_000) {
133142 return doubleData
134143 }
135144
136- /* *
145+ */
146+ /* *
137147 * Removes silence out of the given voice sample
138148 * @param voiceSample the voice sample
139149 * *
140150 * @return a new voice sample with silence removed
141- */
151+ */ /*
152+
142153 fun removeSilence(voiceSample: DoubleArray, sampleRate: Int = 16_000): DoubleArray {
143154 val oneMilliInSamples = sampleRate / 1000
144155
@@ -198,12 +209,14 @@ class AutocorrellatedVoiceActivityDetector { //(val sampleRate: Int = 16_000) {
198209 }
199210 }
200211
201- /* *
212+ */
213+ /* *
202214 * Gets the minimum voice activity length that will be considered by the remove silence method
203215 * @param sampleRate the sample rate
204216 * *
205217 * @return the length
206- */
218+ */ /*
219+
207220 fun getMinimumVoiceActivityLength(sampleRate: Int): Int {
208221 return MIN_VOICE_MILLIS * sampleRate / 1000
209222 }
@@ -212,7 +225,8 @@ class AutocorrellatedVoiceActivityDetector { //(val sampleRate: Int = 16_000) {
212225 return MAX_VOICE_MILLIS * sampleRate / 1000
213226 }
214227
215- /* *
228+ */
229+ /* *
216230 * Applies a linear fade in / out to the given portion of audio (removes unwanted cracks)
217231 * @param voiceSample the voice sample
218232 * *
@@ -221,7 +235,8 @@ class AutocorrellatedVoiceActivityDetector { //(val sampleRate: Int = 16_000) {
221235 * @param startIndex fade in start point
222236 * *
223237 * @param endIndex fade out end point
224- */
238+ */ /*
239+
225240 private fun applyFadeInFadeOut(voiceSample: DoubleArray, fadeLength: Int, startIndex: Int, endIndex: Int) {
226241 val fadeOutStart = endIndex - fadeLength
227242 for (j in 0..fadeLength - 1) {
@@ -230,14 +245,16 @@ class AutocorrellatedVoiceActivityDetector { //(val sampleRate: Int = 16_000) {
230245 }
231246 }
232247
233- /* *
248+ */
249+ /* *
234250 * Merges small active areas
235251 * @param result the voice activity result
236252 * *
237253 * @param minActivityLength the minimum length to apply
238254 * *
239255 * @return a count of silent elements
240- */
256+ */ /*
257+
241258 private fun mergeSmallActiveAreas(result: BooleanArray, minActivityLength: Int): Int {
242259 var active: Boolean
243260 var increment: Int
@@ -262,12 +279,14 @@ class AutocorrellatedVoiceActivityDetector { //(val sampleRate: Int = 16_000) {
262279 return silenceCounter
263280 }
264281
265- /* *
282+ */
283+ /* *
266284 * Merges small silent areas
267285 * @param result the voice activity result
268286 * *
269287 * @param minSilenceLength the minimum silence length to apply
270- */
288+ */ /*
289+
271290 private fun mergeSmallSilentAreas(result: BooleanArray, minSilenceLength: Int) {
272291 var active: Boolean
273292 var increment: Int
@@ -286,10 +305,12 @@ class AutocorrellatedVoiceActivityDetector { //(val sampleRate: Int = 16_000) {
286305 }
287306 }
288307
289- /* *
308+ */
309+ /* *
290310 * Initialize the fade in/ fade out factors properties
291311 * @param fadeLength
292- */
312+ */ /*
313+
293314 private fun initFadeFactors(fadeLength: Int) {
294315 val fadeInFactors = DoubleArray(fadeLength)
295316 val fadeOutFactors = DoubleArray(fadeLength)
@@ -303,14 +324,16 @@ class AutocorrellatedVoiceActivityDetector { //(val sampleRate: Int = 16_000) {
303324 this.fadeOutFactors = fadeOutFactors
304325 }
305326
306- /* *
327+ */
328+ /* *
307329 * Applies autocorrelation in O² operations. Keep arrays very short !
308330 * @param voiceSample the voice sample buffer
309331 * *
310332 * @param correllation the correlation buffer
311333 * *
312334 * @return the mean correlation value
313- */
335+ */ /*
336+
314337 private fun bruteForceAutocorrelation(voiceSample: DoubleArray, correllation: DoubleArray): Double {
315338 correllation.fill(0.0)
316339 val n = voiceSample.size
@@ -327,3 +350,4 @@ class AutocorrellatedVoiceActivityDetector { //(val sampleRate: Int = 16_000) {
327350 private val logger = Logger.getLogger(CustomMicrophone::class.java.simpleName)
328351 }
329352}
353+ */
0 commit comments