java-speech-api/src/com/darkprograms/speech/microphone/MicrophoneAnalyzer.java at 5be2e149d16f1851176effde4520e70289d41ccd · lkuza2/java-speech-api · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
package com.darkprograms.speech.microphone;

import javax.sound.sampled.AudioFileFormat;
import com.darkprograms.speech.util.*;

/********************************************************************************************
 * Microphone Analyzer class, detects pitch and volume while extending the microphone class.
 * Implemented as a precursor to a Voice Activity Detection (VAD) algorithm.
 * Currently can be used for audio data analysis.
 * Dependencies: FFT.java & Complex.java. Both found in the utility package.
 * @author Aaron Gokaslan
 ********************************************************************************************/

public class MicrophoneAnalyzer extends Microphone {

	/**
	 * Constructor
	 * @param fileType The file type you want to save in. FLAC recommended.
	 * @throws MicrophoneException Is thrown if there was an error initializing the microphone
	 */
	public MicrophoneAnalyzer(AudioFileFormat.Type fileType) throws MicrophoneException{
		super(fileType);
	}

    /**
     * Gets the volume of the microphone input
     * Interval is 100ms so allow 100ms for this method to run in your code or specify smaller interval.
     * @return The volume of the microphone input or -1 if data-line is not available
     */
    public int getAudioVolume(){
    	return getAudioVolume(100);
    }

    /**
     * Gets the volume of the microphone input
     * @param interval: The length of time you would like to calculate the volume over in milliseconds.
     * @return The volume of the microphone input or -1 if data-line is not available.
     */
    public int getAudioVolume(int interval){
    	return calculateAudioVolume(this.getNumOfBytes(interval/1000d));
    }

    /**
     * Gets the volume of microphone input
     * @param numOfBytes The number of bytes you want for volume interpretation
     * @return The volume over the specified number of bytes or -1 if data-line is unavailable.
     */
    private int calculateAudioVolume(int numOfBytes){
    	byte[] data = getBytes(numOfBytes);
    	if(data==null)
    		return -1;
    	return calculateRMSLevel(data);
    }

    /**
     * Calculates the volume of AudioData which may be buffered data from a data-line.
     * @param audioData The byte[] you want to determine the volume of
     * @return the calculated volume of audioData
     */
	public static int calculateRMSLevel(byte[] audioData){
		long lSum = 0;
		for(int i=0; i<audioData.length; i++)
			lSum = lSum + audioData[i];

		double dAvg = lSum / audioData.length;

		double sumMeanSquare = 0d;
		for(int j=0; j<audioData.length; j++)
			sumMeanSquare = sumMeanSquare + Math.pow(audioData[j] - dAvg, 2d);

		double averageMeanSquare = sumMeanSquare / audioData.length;
		return (int)(Math.pow(averageMeanSquare,0.5d) + 0.5);
	}

	/**
	 * Returns the number of bytes over interval for useful when figuring out how long to record.
	 * @param seconds The length in seconds
	 * @return the number of bytes the microphone will save.
	 */
	public int getNumOfBytes(int seconds){
		return getNumOfBytes((double)seconds);
	}

	/**
	 * Returns the number of bytes over interval for useful when figuring out how long to record.
	 * @param seconds The length in seconds
	 * @return the number of bytes the microphone will output over the specified time.
	 */
	public int getNumOfBytes(double seconds){
		return (int)(seconds*getAudioFormat().getSampleRate()*getAudioFormat().getFrameSize()+.5);
	}

	/**
	 * Returns the a byte[] containing the specified number of bytes
	 * @param numOfBytes The length of the returned array.
	 * @return The specified array or null if it cannot.
	 */
	private byte[] getBytes(int numOfBytes){
		if(getTargetDataLine()!=null){
    		byte[] data = new byte[numOfBytes];
    		this.getTargetDataLine().read(data, 0, numOfBytes);
    		return data;
		}
		return null;//If data cannot be read, returns a null array.
	}


	/**
	 * Calculates the fundamental frequency. In other words, it calculates pitch,
	 * except pitch is far more subjective and subtle. Also note, that readings may occasionally,
	 * be in error due to the complex nature of sound. This feature is in Beta
	 * @return The frequency of the sound in Hertz.
	 */
	public int getFrequency(){
		try {
			return getFrequency(4096);
		} catch (Exception e) {
			//This will never happen. Ever...
			return -666;
		}
	}

	/**
	 * Calculates the frequency based off of the number of bytes.
	 * CAVEAT: THE NUMBER OF BYTES MUST BE A MULTIPLE OF 2!!!
	 * @param numOfBytes The number of bytes which must be a multiple of 2!!!
	 * @return The calculated frequency in Hertz.
	 */
	public int getFrequency(int numOfBytes) throws Exception{
		if(getTargetDataLine() == null){
			return -1;
		}
		byte[] data = new byte[numOfBytes+1];//One byte is lost during conversion
    	this.getTargetDataLine().read(data, 0, numOfBytes);
		return getFrequency(data);
	}

	/**
	 * Calculates the frequency based off of the byte array,
	 * @param bytes The audioData you want to analyze
	 * @return The calculated frequency in Hertz.
	 */
	public int getFrequency(byte[] bytes){
		double[] audioData = this.bytesToDoubleArray(bytes);
		audioData = applyHanningWindow(audioData);
		Complex[] complex = new Complex[audioData.length];
		for(int i = 0; i<complex.length; i++){
			complex[i] = new Complex(audioData[i], 0);
		}
		Complex[] fftTransformed = FFT.fft(complex);
		return this.calculateFundamentalFrequency(fftTransformed, 4);
	}

	/**
	 * Applies a Hanning Window to the data set.
	 * Hanning Windows are used to increase the accuracy of the FFT.
	 * One should always apply a window to a dataset before applying an FFT
	 * @param The data you want to apply the window to
	 * @return The windowed data set
	 */
	private double[] applyHanningWindow(double[] data){
		return applyHanningWindow(data, 0, data.length);
	}

	/**
	 * Applies a Hanning Window to the data set.
	 * Hanning Windows are used to increase the accuracy of the FFT.
	 * One should always apply a window to a dataset before applying an FFT
	 * @param The data you want to apply the window to
	 * @param The starting index you want to apply a window from
	 * @param The size of the window
	 * @return The windowed data set
	 */
	private double[] applyHanningWindow(double[] signal_in, int pos, int size){
		for (int i = pos; i < pos + size; i++){
			int j = i - pos; // j = index into Hann window function
			signal_in[i] = (double)(signal_in[i] * 0.5 * (1.0 - Math.cos(2.0 * Math.PI * j / size)));
		}
		return signal_in;
	}


	/**
	 * This method calculates the fundamental frequency using Harmonic Product Specturm
	 * It down samples the FFTData four times and multiplies the arrays
	 * together to determine the fundamental frequency. This is slightly more computationally
	 * expensive, but much more accurate. In simpler terms, the function will remove the harmonic frequencies
	 * which occur at every N value by finding the lowest common divisor among them.
	 * @param fftData The array returned by the FFT
	 * @param N the number of times you wish to downsample.
	 * WARNING: The more times you downsample, the lower the maximum detectable frequency is.
	 * @return The fundamental frequency in Hertz
	 */
	private int calculateFundamentalFrequency(Complex[] fftData, int N){
		if(N<=0 || fftData == null){ return -1; } //error case

		final int LENGTH = fftData.length;//Used to calculate bin size
		fftData = removeNegativeFrequencies(fftData);
		Complex[][] data = new Complex[N][fftData.length/N];
		for(int i = 0; i<N; i++){
			for(int j = 0; j<data[0].length; j++){
				data[i][j] = fftData[j*(i+1)];
			}
		}
		Complex[] result = new Complex[fftData.length/N];//Combines the arrays
		for(int i = 0; i<result.length; i++){
			Complex tmp = new Complex(1,0);
			for(int j = 0; j<N; j++){
				tmp = tmp.times(data[j][i]);
			}
			result[i] = tmp;
		}
		int index = this.findMaxMagnitude(result);
		return index*getFFTBinSize(LENGTH);
	}

	/**
	 * Removes useless data from transform since sound doesn't use complex numbers.
	 * @param The data you want to remove the complex transforms from
	 * @return The cleaned data
	 */
	private Complex[] removeNegativeFrequencies(Complex[] c){
		Complex[] out = new Complex[c.length/2];
		for(int i = 0; i<out.length; i++){
			out[i] = c[i];
		}
		return out;
	}

	/**
	 * Calculates the FFTbin size based off the length of the the array
	 * Each FFTBin size represents the range of frequencies treated as one.
	 * For example, if the bin size is 5 then the algorithm is precise to within 5hz.
	 * Precondition: length cannot be 0.
	 * @param fftDataLength The length of the array used to feed the FFT algorithm
	 * @return FFTBin size
	 */
	private int getFFTBinSize(int fftDataLength){
		return (int)(getAudioFormat().getSampleRate()/fftDataLength+.5);
	}

	/**
	 * Calculates index of the maximum magnitude in a complex array.
	 * @param The Complex[] you want to get max magnitude from.
	 * @return The index of the max magnitude
	 */
	private int findMaxMagnitude(Complex[] input){
		//Calculates Maximum Magnitude of the array
		double max = Double.MIN_VALUE;
		int index = -1;
		for(int i = 0; i<input.length; i++){
			Complex c = input[i];
			double tmp = c.getMagnitude();
			if(tmp>max){
				max = tmp;;
				index = i;
			}
		}
		return index;
	}

	/**
	 * Converts bytes from a TargetDataLine into a double[] allowing the information to be read.
	 * NOTE: One byte is lost in the conversion so don't expect the arrays to be the same length!
	 * @param bufferData The buffer read in from the target data line
	 * @return The double[] that the buffer has been converted into.
	 */
	private double[] bytesToDoubleArray(byte[] bufferData){
	    final int bytesRecorded = bufferData.length;
		final int bytesPerSample = getAudioFormat().getSampleSizeInBits()/8;
	    final double amplification = 100.0; // choose a number as you like
	    double[] micBufferData = new double[bytesRecorded - bytesPerSample +1];
	    for (int index = 0, floatIndex = 0; index < bytesRecorded - bytesPerSample + 1; index += bytesPerSample, floatIndex++) {
	        double sample = 0;
	        for (int b = 0; b < bytesPerSample; b++) {
	            int v = bufferData[index + b];
	            if (b < bytesPerSample - 1 || bytesPerSample == 1) {
	                v &= 0xFF;
	            }
	            sample += v << (b * 8);
	        }
	        double sample32 = amplification * (sample / 32768.0);
	        micBufferData[floatIndex] = sample32;

	    }
	    return micBufferData;
	}

}