|
| 1 | +// |
| 2 | +// Copyright (c) Microsoft. All rights reserved. |
| 3 | +// Licensed under the MIT license. |
| 4 | +// |
| 5 | +// Microsoft Cognitive Services (formerly Project Oxford): https://www.microsoft.com/cognitive-services |
| 6 | +// |
| 7 | +// Microsoft Cognitive Services (formerly Project Oxford) GitHub: |
| 8 | +// https://github.com/Microsoft/Cognitive-Speech-TTS |
| 9 | +// |
| 10 | +// Copyright (c) Microsoft Corporation |
| 11 | +// All rights reserved. |
| 12 | +// |
| 13 | +// MIT License: |
| 14 | +// Permission is hereby granted, free of charge, to any person obtaining |
| 15 | +// a copy of this software and associated documentation files (the |
| 16 | +// "Software"), to deal in the Software without restriction, including |
| 17 | +// without limitation the rights to use, copy, modify, merge, publish, |
| 18 | +// distribute, sublicense, and/or sell copies of the Software, and to |
| 19 | +// permit persons to whom the Software is furnished to do so, subject to |
| 20 | +// the following conditions: |
| 21 | +// |
| 22 | +// The above copyright notice and this permission notice shall be |
| 23 | +// included in all copies or substantial portions of the Software. |
| 24 | +// |
| 25 | +// THE SOFTWARE IS PROVIDED ""AS IS"", WITHOUT WARRANTY OF ANY KIND, |
| 26 | +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
| 27 | +// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
| 28 | +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE |
| 29 | +// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
| 30 | +// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
| 31 | +// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
| 32 | +// |
| 33 | + |
| 34 | +package com.microsoft.cognitiveservices.pronunciationassessment; |
| 35 | + |
| 36 | +import java.io.File; |
| 37 | +import java.io.FileInputStream; |
| 38 | +import java.io.InputStream; |
| 39 | +import java.io.OutputStream; |
| 40 | +import java.net.URL; |
| 41 | +import java.util.Base64; |
| 42 | + |
| 43 | +import javax.net.ssl.HttpsURLConnection; |
| 44 | + |
| 45 | +public class Sample { |
| 46 | + public static void main(String[] args) throws Exception { |
| 47 | + |
| 48 | + String subscriptionKey = "{SubscriptionKey}"; // replace this with your subscription key |
| 49 | + String region = "{Region}"; // replace this with the region corresponding to your subscription key, e.g. westus, eastasia |
| 50 | + |
| 51 | + // a common wave header, with zero audio length |
| 52 | + // since stream data doesn't contain header, but the API requires header to fetch format information, so you need post this header as first chunk for each query |
| 53 | + final byte[] WaveHeader16K16BitMono = new byte[] { 82, 73, 70, 70, 78, (byte)128, 0, 0, 87, 65, 86, 69, 102, 109, 116, 32, 18, 0, 0, 0, 1, 0, 1, 0, (byte)128, 62, 0, 0, 0, 125, 0, 0, 2, 0, 16, 0, 0, 0, 100, 97, 116, 97, 0, 0, 0, 0 }; |
| 54 | + |
| 55 | + // build pronunciation assessment parameters |
| 56 | + String referenceText = "Good morning."; |
| 57 | + String pronAssessmentParamsJson = "{\"ReferenceText\":\"" + referenceText + "\",\"GradingSystem\":\"HundredMark\",\"Dimension\":\"Comprehensive\"}"; |
| 58 | + byte[] pronAssessmentParamsBase64 = Base64.getEncoder().encode(pronAssessmentParamsJson.getBytes("utf-8")); |
| 59 | + String pronAssessmentParams = new String(pronAssessmentParamsBase64, "utf-8"); |
| 60 | + |
| 61 | + // build request |
| 62 | + URL url = new URL("https://" + region + ".stt.speech.microsoft.com/speech/recognition/conversation/cognitiveservices/v1?language=en-us"); |
| 63 | + HttpsURLConnection connection = (HttpsURLConnection) url.openConnection(); |
| 64 | + connection.setRequestMethod("POST"); |
| 65 | + connection.setDoOutput(true); |
| 66 | + connection.setChunkedStreamingMode(0); |
| 67 | + connection.setRequestProperty("Accept", "application/json;text/xml"); |
| 68 | + connection.setRequestProperty("Content-Type", "audio/wav; codecs=audio/pcm; samplerate=16000"); |
| 69 | + connection.setRequestProperty("Ocp-Apim-Subscription-Key", subscriptionKey); |
| 70 | + connection.setRequestProperty("Pronunciation-Assessment", pronAssessmentParams); |
| 71 | + |
| 72 | + // send request with chunked data |
| 73 | + File file = new File("../../goodmorning.pcm"); |
| 74 | + FileInputStream fileStream = new FileInputStream(file); |
| 75 | + byte[] audioChunk = new byte[1024]; |
| 76 | + |
| 77 | + OutputStream outputStream = connection.getOutputStream(); |
| 78 | + outputStream.write(WaveHeader16K16BitMono); |
| 79 | + int chunkSize = fileStream.read(audioChunk); |
| 80 | + while (chunkSize > 0) |
| 81 | + { |
| 82 | + Thread.sleep(chunkSize / 32); // to simulate human speaking rate |
| 83 | + outputStream.write(audioChunk, 0, chunkSize); |
| 84 | + chunkSize = fileStream.read(audioChunk); |
| 85 | + } |
| 86 | + |
| 87 | + fileStream.close(); |
| 88 | + outputStream.flush(); |
| 89 | + outputStream.close(); |
| 90 | + |
| 91 | + long uploadFinishTime = System.currentTimeMillis(); |
| 92 | + |
| 93 | + // receive response |
| 94 | + byte[] responseBuffer = new byte[connection.getContentLength()]; |
| 95 | + InputStream inputStream = connection.getInputStream(); |
| 96 | + inputStream.read(responseBuffer); |
| 97 | + String result = new String(responseBuffer, "utf-8"); // the result is a JSON, you can parse it with a JSON library |
| 98 | + |
| 99 | + System.out.println("Pronunciation assessment result:\n"); |
| 100 | + System.out.println(result); |
| 101 | + |
| 102 | + long getResponseTime = System.currentTimeMillis(); |
| 103 | + System.out.println("\nLatency: " + (getResponseTime - uploadFinishTime) + "ms"); |
| 104 | + } |
| 105 | +} |
0 commit comments