Skip to content

Commit 22df319

Browse files
authored
Add pronunciation assessment REST API sample code on Java (#180)
* Add pronunciation assessment REST API sample code on Java * Add a comment * Put a new line at the end of the file * Make tab alignment
1 parent c88c400 commit 22df319

File tree

4 files changed

+151
-0
lines changed

4 files changed

+151
-0
lines changed
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<classpath>
3+
<classpathentry kind="src" path="src"/>
4+
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
5+
<classpathentry kind="output" path="bin"/>
6+
</classpath>
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<projectDescription>
3+
<name>PronunciationAssessment</name>
4+
<comment></comment>
5+
<projects>
6+
</projects>
7+
<buildSpec>
8+
<buildCommand>
9+
<name>org.eclipse.jdt.core.javabuilder</name>
10+
<arguments>
11+
</arguments>
12+
</buildCommand>
13+
</buildSpec>
14+
<natures>
15+
<nature>org.eclipse.jdt.core.javanature</nature>
16+
</natures>
17+
</projectDescription>
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# Pronunciation Assessment - Java Run-Time Environment (JRE) Samples
2+
3+
This sample shows how to call pronunciation assessment REST API in Java, with chunked (streaming) data uploading.
4+
5+
## Prerequisites
6+
7+
* An Azure subscription with Speech Services enabled. [Get one for free!](https://docs.microsoft.com/azure/cognitive-services/speech-service/get-started)
8+
* 64-bit JRE/JDK for Java 8.
9+
* Version 4.15 of [Eclipse](https://www.eclipse.org), 64-bit.
10+
11+
## Code sample
12+
13+
To use this sample, follow these instructions:
14+
15+
* Clone this repository.
16+
* Open this project in Eclipse.
17+
* Locate `"{SubscriptionKey}"` and replace it with your Speech Services subscription key.
18+
* Locate `"{Region}"` and replace it with region corresponding to your subscription key.
19+
* Run the project and check the result on the console.
20+
21+
## Resources
22+
23+
* [REST API reference](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/rest-speech-to-text)
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
//
2+
// Copyright (c) Microsoft. All rights reserved.
3+
// Licensed under the MIT license.
4+
//
5+
// Microsoft Cognitive Services (formerly Project Oxford): https://www.microsoft.com/cognitive-services
6+
//
7+
// Microsoft Cognitive Services (formerly Project Oxford) GitHub:
8+
// https://github.com/Microsoft/Cognitive-Speech-TTS
9+
//
10+
// Copyright (c) Microsoft Corporation
11+
// All rights reserved.
12+
//
13+
// MIT License:
14+
// Permission is hereby granted, free of charge, to any person obtaining
15+
// a copy of this software and associated documentation files (the
16+
// "Software"), to deal in the Software without restriction, including
17+
// without limitation the rights to use, copy, modify, merge, publish,
18+
// distribute, sublicense, and/or sell copies of the Software, and to
19+
// permit persons to whom the Software is furnished to do so, subject to
20+
// the following conditions:
21+
//
22+
// The above copyright notice and this permission notice shall be
23+
// included in all copies or substantial portions of the Software.
24+
//
25+
// THE SOFTWARE IS PROVIDED ""AS IS"", WITHOUT WARRANTY OF ANY KIND,
26+
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27+
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28+
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
29+
// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
30+
// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
31+
// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
32+
//
33+
34+
package com.microsoft.cognitiveservices.pronunciationassessment;
35+
36+
import java.io.File;
37+
import java.io.FileInputStream;
38+
import java.io.InputStream;
39+
import java.io.OutputStream;
40+
import java.net.URL;
41+
import java.util.Base64;
42+
43+
import javax.net.ssl.HttpsURLConnection;
44+
45+
public class Sample {
46+
public static void main(String[] args) throws Exception {
47+
48+
String subscriptionKey = "{SubscriptionKey}"; // replace this with your subscription key
49+
String region = "{Region}"; // replace this with the region corresponding to your subscription key, e.g. westus, eastasia
50+
51+
// a common wave header, with zero audio length
52+
// since stream data doesn't contain header, but the API requires header to fetch format information, so you need post this header as first chunk for each query
53+
final byte[] WaveHeader16K16BitMono = new byte[] { 82, 73, 70, 70, 78, (byte)128, 0, 0, 87, 65, 86, 69, 102, 109, 116, 32, 18, 0, 0, 0, 1, 0, 1, 0, (byte)128, 62, 0, 0, 0, 125, 0, 0, 2, 0, 16, 0, 0, 0, 100, 97, 116, 97, 0, 0, 0, 0 };
54+
55+
// build pronunciation assessment parameters
56+
String referenceText = "Good morning.";
57+
String pronAssessmentParamsJson = "{\"ReferenceText\":\"" + referenceText + "\",\"GradingSystem\":\"HundredMark\",\"Dimension\":\"Comprehensive\"}";
58+
byte[] pronAssessmentParamsBase64 = Base64.getEncoder().encode(pronAssessmentParamsJson.getBytes("utf-8"));
59+
String pronAssessmentParams = new String(pronAssessmentParamsBase64, "utf-8");
60+
61+
// build request
62+
URL url = new URL("https://" + region + ".stt.speech.microsoft.com/speech/recognition/conversation/cognitiveservices/v1?language=en-us");
63+
HttpsURLConnection connection = (HttpsURLConnection) url.openConnection();
64+
connection.setRequestMethod("POST");
65+
connection.setDoOutput(true);
66+
connection.setChunkedStreamingMode(0);
67+
connection.setRequestProperty("Accept", "application/json;text/xml");
68+
connection.setRequestProperty("Content-Type", "audio/wav; codecs=audio/pcm; samplerate=16000");
69+
connection.setRequestProperty("Ocp-Apim-Subscription-Key", subscriptionKey);
70+
connection.setRequestProperty("Pronunciation-Assessment", pronAssessmentParams);
71+
72+
// send request with chunked data
73+
File file = new File("../../goodmorning.pcm");
74+
FileInputStream fileStream = new FileInputStream(file);
75+
byte[] audioChunk = new byte[1024];
76+
77+
OutputStream outputStream = connection.getOutputStream();
78+
outputStream.write(WaveHeader16K16BitMono);
79+
int chunkSize = fileStream.read(audioChunk);
80+
while (chunkSize > 0)
81+
{
82+
Thread.sleep(chunkSize / 32); // to simulate human speaking rate
83+
outputStream.write(audioChunk, 0, chunkSize);
84+
chunkSize = fileStream.read(audioChunk);
85+
}
86+
87+
fileStream.close();
88+
outputStream.flush();
89+
outputStream.close();
90+
91+
long uploadFinishTime = System.currentTimeMillis();
92+
93+
// receive response
94+
byte[] responseBuffer = new byte[connection.getContentLength()];
95+
InputStream inputStream = connection.getInputStream();
96+
inputStream.read(responseBuffer);
97+
String result = new String(responseBuffer, "utf-8"); // the result is a JSON, you can parse it with a JSON library
98+
99+
System.out.println("Pronunciation assessment result:\n");
100+
System.out.println(result);
101+
102+
long getResponseTime = System.currentTimeMillis();
103+
System.out.println("\nLatency: " + (getResponseTime - uploadFinishTime) + "ms");
104+
}
105+
}

0 commit comments

Comments
 (0)