Skip to content

Commit d296dc6

Browse files
committed
Added experimental RecognizerChunked class that utilizes the new V2
chunked response endpoint.
1 parent 18db2fa commit d296dc6

1 file changed

Lines changed: 296 additions & 0 deletions

File tree

Lines changed: 296 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,296 @@
1+
package com.darkprograms.speech.recognizer;
2+
3+
import java.io.BufferedReader;
4+
import java.io.File;
5+
import java.io.FileInputStream;
6+
import java.io.IOException;
7+
import java.io.InputStreamReader;
8+
import java.io.OutputStream;
9+
import java.net.HttpURLConnection;
10+
import java.net.MalformedURLException;
11+
import java.net.URL;
12+
import java.net.URLConnection;
13+
import java.nio.ByteBuffer;
14+
import java.nio.MappedByteBuffer;
15+
import java.nio.channels.FileChannel;
16+
import java.util.ArrayList;
17+
import java.util.List;
18+
19+
import javax.net.ssl.HttpsURLConnection;
20+
import javax.xml.ws.http.HTTPException;
21+
22+
/**
23+
* This class uses Google's V2 Hook. The class is returns a chunked respones so listeners must be used.
24+
* The class also requires an API-Key (see Constructor) for details. This class is experimental and
25+
* subject to change as we restructure the API.
26+
* @author Aaron Gokaslan (Skylion)
27+
*/
28+
public class RecognizerChunked {
29+
30+
/**
31+
* Google's API V2 URL
32+
*/
33+
private static final String GOOGLE_SPEECH_URL_V2 = "https://www.google.com/speech-api/v2/recognize";
34+
35+
/**
36+
* API-Key used for requests
37+
*/
38+
private final String API_KEY;
39+
40+
/**
41+
* The language code Google uses to determine the language
42+
* Default value is "auto"
43+
*/
44+
private String language;
45+
46+
/**
47+
* Stores the Response Listeners
48+
*/
49+
private List<GSpeechResponseListener> responseListeners = new ArrayList<GSpeechResponseListener>();
50+
51+
/**
52+
* Constructor
53+
* @param API_KEY The API-Key for Google's Speech API. An API key can be obtained by requesting
54+
* one by following the process shown at this
55+
* <a href="http://www.chromium.org/developers/how-tos/api-keys">url</a>.
56+
*/
57+
public RecognizerChunked(String API_KEY){
58+
this.API_KEY = API_KEY;
59+
this.language = "auto";
60+
}
61+
62+
/**
63+
* Constructor
64+
* @param API_KEY The API-Key for Google's Speech API. An API key can be obtained by requesting
65+
* one by following the process shown at this
66+
* <a href="http://www.chromium.org/developers/how-tos/api-keys">url</a>.
67+
* @param language The language you want to use (Iso code)
68+
* Note: This function will most likely be deprecated.
69+
*/
70+
public RecognizerChunked(String API_KEY, String language){
71+
this(API_KEY);
72+
this.language = language;
73+
}
74+
75+
/**
76+
* The current language the Recognizer is set to use. Returns the ISO-Code otherwise,
77+
* it may return "auto."
78+
* @return The ISO-Code or auto if the language the is not specified.
79+
*/
80+
public String getLanguage(){
81+
return language;
82+
}
83+
84+
/**
85+
* Sets the language that the file should return.
86+
* @param language The language as an ISO-Code
87+
*/
88+
public void setLanguage(String language){
89+
this.language = language;
90+
}
91+
92+
/**
93+
* Analyzes the file for speech
94+
* @param infile The file you want to analyze for speech.
95+
* @param sampleRate The sample rate of the audioFile.
96+
* @throws IOException if something goes wrong reading the file.
97+
*/
98+
public void getRecognizedDataForFlac(File infile, int sampleRate) throws IOException{
99+
byte[] data = mapFileIn(infile);
100+
getRecognizedDataForFlac(data, sampleRate);
101+
}
102+
103+
/**
104+
* Analyzes the file for speech
105+
* @param infile The file you want to analyze for speech.
106+
* @param sampleRate The sample rate of the audioFile.
107+
* @throws IOException if something goes wrong reading the file.
108+
*/
109+
public void getRecognizedDataForFlac(String inFile, int sampleRate) throws IOException{
110+
getRecognizedDataForFlac(new File(inFile), sampleRate);
111+
}
112+
113+
/**
114+
* Recognizes the byte data.
115+
* @param data
116+
* @param sampleRate
117+
*/
118+
public void getRecognizedDataForFlac(byte[] data, int sampleRate){
119+
StringBuilder sb = new StringBuilder(GOOGLE_SPEECH_URL_V2);
120+
sb.append("?output=json");
121+
sb.append("&client=chromium");
122+
sb.append("&lang=" + language);
123+
sb.append("&key=" + API_KEY);
124+
String url = sb.toString();
125+
126+
openHttpsPostConnection(url, data, sampleRate);
127+
}
128+
129+
/**
130+
* Opens a chunked response HTTPS line to the specified URL
131+
* @param urlStr The URL string to connect for chunking
132+
* @param data The data you want to send to Google. Speech files under 15 seconds long recommended.
133+
* @param sampleRate The sample rate for your audio file.
134+
*/
135+
private void openHttpsPostConnection(final String urlStr, final byte[] data, final int sampleRate) {
136+
new Thread () {
137+
public void run() {
138+
HttpsURLConnection httpConn = null;
139+
ByteBuffer buff = ByteBuffer.wrap(data);
140+
byte[] destdata = new byte[2048];
141+
int resCode = -1;
142+
OutputStream out = null;
143+
try {
144+
URL url = new URL(urlStr);
145+
URLConnection urlConn = url.openConnection();
146+
if (!(urlConn instanceof HttpsURLConnection)) {
147+
throw new IOException ("URL must be HTTPS");
148+
}
149+
httpConn = (HttpsURLConnection)urlConn;
150+
httpConn.setAllowUserInteraction(false);
151+
httpConn.setInstanceFollowRedirects(true);
152+
httpConn.setRequestMethod("POST");
153+
httpConn.setDoOutput(true);
154+
httpConn.setChunkedStreamingMode(0); //TransferType: chunked
155+
httpConn.setRequestProperty("Content-Type", "audio/x-flac; rate=" + sampleRate);
156+
// this opens a connection, then sends POST & headers.
157+
out = httpConn.getOutputStream();
158+
//beyond 15 sec duration just simply writing the file
159+
// does not seem to work. So buffer it and delay to simulate
160+
// bufferd microphone delivering stream of speech
161+
// re: net.http.ChunkedOutputStream.java
162+
while(buff.remaining() >= destdata.length){
163+
buff.get(destdata);
164+
out.write(destdata);
165+
};
166+
byte[] lastr = new byte[buff.remaining()];
167+
buff.get(lastr, 0, lastr.length);
168+
out.write(lastr);
169+
out.close();
170+
if(resCode >= HttpURLConnection.HTTP_UNAUTHORIZED){//Stops here if Google doesn't like us/
171+
throw new HTTPException(HttpURLConnection.HTTP_UNAUTHORIZED);//Throws
172+
}
173+
String line;//Each line that is read back from Google.
174+
BufferedReader br = new BufferedReader(new InputStreamReader(httpConn.getInputStream()));
175+
while ((line = br.readLine( )) != null) {
176+
if(line.length()>19 && resCode > 100 && resCode < HttpURLConnection.HTTP_UNAUTHORIZED){
177+
GoogleResponse gr = new GoogleResponse();
178+
parseResponse(line, gr);
179+
fireResponseEvent(gr);
180+
}
181+
}
182+
} catch (MalformedURLException e) {
183+
e.printStackTrace();
184+
} catch (IOException e) {
185+
e.printStackTrace();
186+
}
187+
finally {httpConn.disconnect();}
188+
}
189+
}.start();
190+
}
191+
192+
/**
193+
* Converts the file into a byte[].
194+
* @param infile The File you want to specify
195+
* @return a byte array
196+
* @throws IOException if something goes wrong reading the file.
197+
*/
198+
private byte[] mapFileIn(File infile) throws IOException{
199+
FileInputStream fis = new FileInputStream(infile);
200+
try{
201+
FileChannel fc = fis.getChannel(); // Get the file's size and then map it into memory
202+
int sz = (int)fc.size();
203+
MappedByteBuffer bb = fc.map(FileChannel.MapMode.READ_ONLY, 0, sz);
204+
byte[] data2 = new byte[bb.remaining()];
205+
bb.get(data2);
206+
return data2;
207+
}
208+
finally{//Ensures resources are closed regardless of whether the action suceeded
209+
fis.close();
210+
}
211+
}
212+
213+
/**
214+
* Parses the response into a Google Response
215+
* @param rawResponse The raw String you want to parse
216+
* @param gr The GoogleResponse you want to parse into ti.
217+
*/
218+
private void parseResponse(String rawResponse, GoogleResponse gr){
219+
if(rawResponse == null || !rawResponse.contains("\"result\"")){ return; }
220+
if(rawResponse.contains("\"confidence\":")){
221+
String confidence = getBetween(rawResponse, "\"confidence\":", "}");
222+
gr.setConfidence(confidence);
223+
}
224+
else{
225+
gr.setConfidence(String.valueOf(1d));
226+
}
227+
String array = getBetween(rawResponse, "[", "]");
228+
if(array.contains("[")){
229+
array = getBetween(array, "[", "]");
230+
}
231+
String[] parts = array.split(",");
232+
gr.setResponse(parseTranscript(parts[0]));
233+
for(int i = 1; i<parts.length; i++){
234+
gr.getOtherPossibleResponses().add(parseTranscript(parts[i]));
235+
}
236+
}
237+
238+
/**
239+
* Cleans up the transcript portion of the String
240+
* @param s The string you want to process.
241+
* @return The reformated string.
242+
*/
243+
private String parseTranscript(String s){
244+
String tmp = s.substring(s.indexOf(":")+1);
245+
if(s.endsWith("}")){
246+
tmp = tmp.substring(0, tmp.length()-1);
247+
}
248+
tmp = stripQuotes(tmp);
249+
return tmp;
250+
}
251+
252+
/**
253+
* Adds responseListener that triggers when a response from Google is recieved
254+
* @param rl The response listener you want to add
255+
*/
256+
public synchronized void addResponseListener(GSpeechResponseListener rl){
257+
responseListeners.add(rl);
258+
}
259+
260+
/**
261+
* Removes the specified response listener
262+
* @param rl The response listener
263+
*/
264+
public synchronized void removeResponseListener(GSpeechResponseListener rl){
265+
responseListeners.remove(rl);
266+
}
267+
268+
/**
269+
* Fires the response listener
270+
* @param gr The GoogleResponse as the event object.
271+
*/
272+
private synchronized void fireResponseEvent(GoogleResponse gr){
273+
for(GSpeechResponseListener gl: responseListeners){
274+
gl.onResponse(gr);
275+
}
276+
}
277+
278+
private String stripQuotes(String s) {
279+
int start = 0;
280+
if( s.startsWith("\"") ) {
281+
start = 1;
282+
}
283+
int end = s.length();
284+
if( s.endsWith("\"") ) {
285+
end = s.length() - 1;
286+
}
287+
return s.substring(start, end);
288+
}
289+
290+
private String getBetween(String s, String part1, String part2){
291+
String tmp = s.substring(s.indexOf(part1) + part1.length() + 1);
292+
tmp = tmp.substring(0, tmp.lastIndexOf(part2));
293+
return tmp;
294+
}
295+
296+
}

0 commit comments

Comments
 (0)