|
| 1 | +package com.darkprograms.speech.recognizer; |
| 2 | + |
| 3 | +import java.io.BufferedReader; |
| 4 | +import java.io.File; |
| 5 | +import java.io.FileInputStream; |
| 6 | +import java.io.IOException; |
| 7 | +import java.io.InputStreamReader; |
| 8 | +import java.io.OutputStream; |
| 9 | +import java.net.HttpURLConnection; |
| 10 | +import java.net.MalformedURLException; |
| 11 | +import java.net.URL; |
| 12 | +import java.net.URLConnection; |
| 13 | +import java.nio.ByteBuffer; |
| 14 | +import java.nio.MappedByteBuffer; |
| 15 | +import java.nio.channels.FileChannel; |
| 16 | +import java.util.ArrayList; |
| 17 | +import java.util.List; |
| 18 | + |
| 19 | +import javax.net.ssl.HttpsURLConnection; |
| 20 | +import javax.xml.ws.http.HTTPException; |
| 21 | + |
| 22 | +/** |
| 23 | + * This class uses Google's V2 Hook. The class is returns a chunked respones so listeners must be used. |
| 24 | + * The class also requires an API-Key (see Constructor) for details. This class is experimental and |
| 25 | + * subject to change as we restructure the API. |
| 26 | + * @author Aaron Gokaslan (Skylion) |
| 27 | + */ |
| 28 | +public class RecognizerChunked { |
| 29 | + |
| 30 | + /** |
| 31 | + * Google's API V2 URL |
| 32 | + */ |
| 33 | + private static final String GOOGLE_SPEECH_URL_V2 = "https://www.google.com/speech-api/v2/recognize"; |
| 34 | + |
| 35 | + /** |
| 36 | + * API-Key used for requests |
| 37 | + */ |
| 38 | + private final String API_KEY; |
| 39 | + |
| 40 | + /** |
| 41 | + * The language code Google uses to determine the language |
| 42 | + * Default value is "auto" |
| 43 | + */ |
| 44 | + private String language; |
| 45 | + |
| 46 | + /** |
| 47 | + * Stores the Response Listeners |
| 48 | + */ |
| 49 | + private List<GSpeechResponseListener> responseListeners = new ArrayList<GSpeechResponseListener>(); |
| 50 | + |
| 51 | + /** |
| 52 | + * Constructor |
| 53 | + * @param API_KEY The API-Key for Google's Speech API. An API key can be obtained by requesting |
| 54 | + * one by following the process shown at this |
| 55 | + * <a href="http://www.chromium.org/developers/how-tos/api-keys">url</a>. |
| 56 | + */ |
| 57 | + public RecognizerChunked(String API_KEY){ |
| 58 | + this.API_KEY = API_KEY; |
| 59 | + this.language = "auto"; |
| 60 | + } |
| 61 | + |
| 62 | + /** |
| 63 | + * Constructor |
| 64 | + * @param API_KEY The API-Key for Google's Speech API. An API key can be obtained by requesting |
| 65 | + * one by following the process shown at this |
| 66 | + * <a href="http://www.chromium.org/developers/how-tos/api-keys">url</a>. |
| 67 | + * @param language The language you want to use (Iso code) |
| 68 | + * Note: This function will most likely be deprecated. |
| 69 | + */ |
| 70 | + public RecognizerChunked(String API_KEY, String language){ |
| 71 | + this(API_KEY); |
| 72 | + this.language = language; |
| 73 | + } |
| 74 | + |
| 75 | + /** |
| 76 | + * The current language the Recognizer is set to use. Returns the ISO-Code otherwise, |
| 77 | + * it may return "auto." |
| 78 | + * @return The ISO-Code or auto if the language the is not specified. |
| 79 | + */ |
| 80 | + public String getLanguage(){ |
| 81 | + return language; |
| 82 | + } |
| 83 | + |
| 84 | + /** |
| 85 | + * Sets the language that the file should return. |
| 86 | + * @param language The language as an ISO-Code |
| 87 | + */ |
| 88 | + public void setLanguage(String language){ |
| 89 | + this.language = language; |
| 90 | + } |
| 91 | + |
| 92 | + /** |
| 93 | + * Analyzes the file for speech |
| 94 | + * @param infile The file you want to analyze for speech. |
| 95 | + * @param sampleRate The sample rate of the audioFile. |
| 96 | + * @throws IOException if something goes wrong reading the file. |
| 97 | + */ |
| 98 | + public void getRecognizedDataForFlac(File infile, int sampleRate) throws IOException{ |
| 99 | + byte[] data = mapFileIn(infile); |
| 100 | + getRecognizedDataForFlac(data, sampleRate); |
| 101 | + } |
| 102 | + |
| 103 | + /** |
| 104 | + * Analyzes the file for speech |
| 105 | + * @param infile The file you want to analyze for speech. |
| 106 | + * @param sampleRate The sample rate of the audioFile. |
| 107 | + * @throws IOException if something goes wrong reading the file. |
| 108 | + */ |
| 109 | + public void getRecognizedDataForFlac(String inFile, int sampleRate) throws IOException{ |
| 110 | + getRecognizedDataForFlac(new File(inFile), sampleRate); |
| 111 | + } |
| 112 | + |
| 113 | + /** |
| 114 | + * Recognizes the byte data. |
| 115 | + * @param data |
| 116 | + * @param sampleRate |
| 117 | + */ |
| 118 | + public void getRecognizedDataForFlac(byte[] data, int sampleRate){ |
| 119 | + StringBuilder sb = new StringBuilder(GOOGLE_SPEECH_URL_V2); |
| 120 | + sb.append("?output=json"); |
| 121 | + sb.append("&client=chromium"); |
| 122 | + sb.append("&lang=" + language); |
| 123 | + sb.append("&key=" + API_KEY); |
| 124 | + String url = sb.toString(); |
| 125 | + |
| 126 | + openHttpsPostConnection(url, data, sampleRate); |
| 127 | + } |
| 128 | + |
| 129 | + /** |
| 130 | + * Opens a chunked response HTTPS line to the specified URL |
| 131 | + * @param urlStr The URL string to connect for chunking |
| 132 | + * @param data The data you want to send to Google. Speech files under 15 seconds long recommended. |
| 133 | + * @param sampleRate The sample rate for your audio file. |
| 134 | + */ |
| 135 | + private void openHttpsPostConnection(final String urlStr, final byte[] data, final int sampleRate) { |
| 136 | + new Thread () { |
| 137 | + public void run() { |
| 138 | + HttpsURLConnection httpConn = null; |
| 139 | + ByteBuffer buff = ByteBuffer.wrap(data); |
| 140 | + byte[] destdata = new byte[2048]; |
| 141 | + int resCode = -1; |
| 142 | + OutputStream out = null; |
| 143 | + try { |
| 144 | + URL url = new URL(urlStr); |
| 145 | + URLConnection urlConn = url.openConnection(); |
| 146 | + if (!(urlConn instanceof HttpsURLConnection)) { |
| 147 | + throw new IOException ("URL must be HTTPS"); |
| 148 | + } |
| 149 | + httpConn = (HttpsURLConnection)urlConn; |
| 150 | + httpConn.setAllowUserInteraction(false); |
| 151 | + httpConn.setInstanceFollowRedirects(true); |
| 152 | + httpConn.setRequestMethod("POST"); |
| 153 | + httpConn.setDoOutput(true); |
| 154 | + httpConn.setChunkedStreamingMode(0); //TransferType: chunked |
| 155 | + httpConn.setRequestProperty("Content-Type", "audio/x-flac; rate=" + sampleRate); |
| 156 | + // this opens a connection, then sends POST & headers. |
| 157 | + out = httpConn.getOutputStream(); |
| 158 | + //beyond 15 sec duration just simply writing the file |
| 159 | + // does not seem to work. So buffer it and delay to simulate |
| 160 | + // bufferd microphone delivering stream of speech |
| 161 | + // re: net.http.ChunkedOutputStream.java |
| 162 | + while(buff.remaining() >= destdata.length){ |
| 163 | + buff.get(destdata); |
| 164 | + out.write(destdata); |
| 165 | + }; |
| 166 | + byte[] lastr = new byte[buff.remaining()]; |
| 167 | + buff.get(lastr, 0, lastr.length); |
| 168 | + out.write(lastr); |
| 169 | + out.close(); |
| 170 | + if(resCode >= HttpURLConnection.HTTP_UNAUTHORIZED){//Stops here if Google doesn't like us/ |
| 171 | + throw new HTTPException(HttpURLConnection.HTTP_UNAUTHORIZED);//Throws |
| 172 | + } |
| 173 | + String line;//Each line that is read back from Google. |
| 174 | + BufferedReader br = new BufferedReader(new InputStreamReader(httpConn.getInputStream())); |
| 175 | + while ((line = br.readLine( )) != null) { |
| 176 | + if(line.length()>19 && resCode > 100 && resCode < HttpURLConnection.HTTP_UNAUTHORIZED){ |
| 177 | + GoogleResponse gr = new GoogleResponse(); |
| 178 | + parseResponse(line, gr); |
| 179 | + fireResponseEvent(gr); |
| 180 | + } |
| 181 | + } |
| 182 | + } catch (MalformedURLException e) { |
| 183 | + e.printStackTrace(); |
| 184 | + } catch (IOException e) { |
| 185 | + e.printStackTrace(); |
| 186 | + } |
| 187 | + finally {httpConn.disconnect();} |
| 188 | + } |
| 189 | + }.start(); |
| 190 | + } |
| 191 | + |
| 192 | + /** |
| 193 | + * Converts the file into a byte[]. |
| 194 | + * @param infile The File you want to specify |
| 195 | + * @return a byte array |
| 196 | + * @throws IOException if something goes wrong reading the file. |
| 197 | + */ |
| 198 | + private byte[] mapFileIn(File infile) throws IOException{ |
| 199 | + FileInputStream fis = new FileInputStream(infile); |
| 200 | + try{ |
| 201 | + FileChannel fc = fis.getChannel(); // Get the file's size and then map it into memory |
| 202 | + int sz = (int)fc.size(); |
| 203 | + MappedByteBuffer bb = fc.map(FileChannel.MapMode.READ_ONLY, 0, sz); |
| 204 | + byte[] data2 = new byte[bb.remaining()]; |
| 205 | + bb.get(data2); |
| 206 | + return data2; |
| 207 | + } |
| 208 | + finally{//Ensures resources are closed regardless of whether the action suceeded |
| 209 | + fis.close(); |
| 210 | + } |
| 211 | + } |
| 212 | + |
| 213 | + /** |
| 214 | + * Parses the response into a Google Response |
| 215 | + * @param rawResponse The raw String you want to parse |
| 216 | + * @param gr The GoogleResponse you want to parse into ti. |
| 217 | + */ |
| 218 | + private void parseResponse(String rawResponse, GoogleResponse gr){ |
| 219 | + if(rawResponse == null || !rawResponse.contains("\"result\"")){ return; } |
| 220 | + if(rawResponse.contains("\"confidence\":")){ |
| 221 | + String confidence = getBetween(rawResponse, "\"confidence\":", "}"); |
| 222 | + gr.setConfidence(confidence); |
| 223 | + } |
| 224 | + else{ |
| 225 | + gr.setConfidence(String.valueOf(1d)); |
| 226 | + } |
| 227 | + String array = getBetween(rawResponse, "[", "]"); |
| 228 | + if(array.contains("[")){ |
| 229 | + array = getBetween(array, "[", "]"); |
| 230 | + } |
| 231 | + String[] parts = array.split(","); |
| 232 | + gr.setResponse(parseTranscript(parts[0])); |
| 233 | + for(int i = 1; i<parts.length; i++){ |
| 234 | + gr.getOtherPossibleResponses().add(parseTranscript(parts[i])); |
| 235 | + } |
| 236 | + } |
| 237 | + |
| 238 | + /** |
| 239 | + * Cleans up the transcript portion of the String |
| 240 | + * @param s The string you want to process. |
| 241 | + * @return The reformated string. |
| 242 | + */ |
| 243 | + private String parseTranscript(String s){ |
| 244 | + String tmp = s.substring(s.indexOf(":")+1); |
| 245 | + if(s.endsWith("}")){ |
| 246 | + tmp = tmp.substring(0, tmp.length()-1); |
| 247 | + } |
| 248 | + tmp = stripQuotes(tmp); |
| 249 | + return tmp; |
| 250 | + } |
| 251 | + |
| 252 | + /** |
| 253 | + * Adds responseListener that triggers when a response from Google is recieved |
| 254 | + * @param rl The response listener you want to add |
| 255 | + */ |
| 256 | + public synchronized void addResponseListener(GSpeechResponseListener rl){ |
| 257 | + responseListeners.add(rl); |
| 258 | + } |
| 259 | + |
| 260 | + /** |
| 261 | + * Removes the specified response listener |
| 262 | + * @param rl The response listener |
| 263 | + */ |
| 264 | + public synchronized void removeResponseListener(GSpeechResponseListener rl){ |
| 265 | + responseListeners.remove(rl); |
| 266 | + } |
| 267 | + |
| 268 | + /** |
| 269 | + * Fires the response listener |
| 270 | + * @param gr The GoogleResponse as the event object. |
| 271 | + */ |
| 272 | + private synchronized void fireResponseEvent(GoogleResponse gr){ |
| 273 | + for(GSpeechResponseListener gl: responseListeners){ |
| 274 | + gl.onResponse(gr); |
| 275 | + } |
| 276 | + } |
| 277 | + |
| 278 | + private String stripQuotes(String s) { |
| 279 | + int start = 0; |
| 280 | + if( s.startsWith("\"") ) { |
| 281 | + start = 1; |
| 282 | + } |
| 283 | + int end = s.length(); |
| 284 | + if( s.endsWith("\"") ) { |
| 285 | + end = s.length() - 1; |
| 286 | + } |
| 287 | + return s.substring(start, end); |
| 288 | + } |
| 289 | + |
| 290 | + private String getBetween(String s, String part1, String part2){ |
| 291 | + String tmp = s.substring(s.indexOf(part1) + part1.length() + 1); |
| 292 | + tmp = tmp.substring(0, tmp.lastIndexOf(part2)); |
| 293 | + return tmp; |
| 294 | + } |
| 295 | + |
| 296 | +} |
0 commit comments