Skip to content

Commit 3c864df

Browse files
yinhewboltomli
authored andcommitted
Add pronunciation assessment REST API sample code in Node.js
1 parent 22df319 commit 3c864df

File tree

4 files changed

+128
-1
lines changed

4 files changed

+128
-1
lines changed

PronunciationAssessment/Java/jre/src/com/microsoft/cognitiveservices/pronunciationassessment/Sample.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ public static void main(String[] args) throws Exception {
5858
byte[] pronAssessmentParamsBase64 = Base64.getEncoder().encode(pronAssessmentParamsJson.getBytes("utf-8"));
5959
String pronAssessmentParams = new String(pronAssessmentParamsBase64, "utf-8");
6060

61-
// build request
61+
// build request (when re-run below code in short time, the connect can be cached and reused behind, with lower connecting time cost)
6262
URL url = new URL("https://" + region + ".stt.speech.microsoft.com/speech/recognition/conversation/cognitiveservices/v1?language=en-us");
6363
HttpsURLConnection connection = (HttpsURLConnection) url.openConnection();
6464
connection.setRequestMethod("POST");
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# Pronunciation Assessment - Node.js Sample
2+
3+
This sample shows how to call pronunciation assessment REST API in Node.js, with chunked (streaming) data uploading.
4+
5+
## Prerequisites
6+
7+
Here's what you'll need to run this sample:
8+
9+
* Your favorite IDE or text editor
10+
* Node.js
11+
* An Azure subscription with Speech Services enabled. [Get one for free!](https://docs.microsoft.com/azure/cognitive-services/speech-service/get-started)
12+
13+
## Code sample
14+
15+
To use this sample, follow these instructions:
16+
17+
* Create a new project using your favorite IDE or text editor
18+
* Copy the code from the sample into a file named `sample.js`
19+
* Run `npm install` to install dependencies
20+
* Set your subscription key and corresponding region
21+
* Run the sample: `node sample.js`
22+
23+
## Resources
24+
25+
* [REST API reference](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/rest-speech-to-text)
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"dependencies": {
3+
"fs": "0.0.1-security",
4+
"request": "^2.88.2"
5+
}
6+
}
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
//
2+
// Copyright (c) Microsoft. All rights reserved.
3+
// Licensed under the MIT license.
4+
//
5+
// Microsoft Cognitive Services (formerly Project Oxford): https://www.microsoft.com/cognitive-services
6+
//
7+
// Microsoft Cognitive Services (formerly Project Oxford) GitHub:
8+
// https://github.com/Microsoft/Cognitive-Speech-TTS
9+
//
10+
// Copyright (c) Microsoft Corporation
11+
// All rights reserved.
12+
//
13+
// MIT License:
14+
// Permission is hereby granted, free of charge, to any person obtaining
15+
// a copy of this software and associated documentation files (the
16+
// "Software"), to deal in the Software without restriction, including
17+
// without limitation the rights to use, copy, modify, merge, publish,
18+
// distribute, sublicense, and/or sell copies of the Software, and to
19+
// permit persons to whom the Software is furnished to do so, subject to
20+
// the following conditions:
21+
//
22+
// The above copyright notice and this permission notice shall be
23+
// included in all copies or substantial portions of the Software.
24+
//
25+
// THE SOFTWARE IS PROVIDED ""AS IS"", WITHOUT WARRANTY OF ANY KIND,
26+
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27+
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28+
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
29+
// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
30+
// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
31+
// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
32+
//
33+
34+
const request = require("request");
35+
const fs = require("fs");
36+
37+
var subscriptionKey = "{SubscriptionKey}" // replace this with your subscription key
38+
var region = "{Region}" // replace this with the region corresponding to your subscription key, e.g.westus, eastasia
39+
40+
// build pronunciation assessment parameters
41+
var referenceText = "Good morning.";
42+
var pronAssessmentParamsJson = `{"ReferenceText":"${referenceText}","GradingSystem":"HundredMark","Dimension":"Comprehensive"}`;
43+
var pronAssessmentParams = Buffer.from(pronAssessmentParamsJson, 'utf-8').toString('base64');
44+
45+
// build request
46+
var options = {
47+
method: 'POST',
48+
baseUrl: `https://${region}.stt.speech.microsoft.com/`,
49+
url: 'speech/recognition/conversation/cognitiveservices/v1?language=en-us',
50+
headers: {
51+
'Accept': 'application/json;text/xml',
52+
'Connection': 'Keep-Alive',
53+
'Content-Type': 'audio/wav; codecs=audio/pcm; samplerate=16000',
54+
'Transfer-Encoding': 'chunked',
55+
'Expect': '100-continue',
56+
'Ocp-Apim-Subscription-Key': subscriptionKey,
57+
'Pronunciation-Assessment': pronAssessmentParams
58+
}
59+
}
60+
61+
var uploadFinishTime;
62+
63+
var req = request.post(options);
64+
req.on("response", (resp) => {
65+
resp.on("data", (chunk) => {
66+
var result = chunk.toString('utf-8');
67+
console.log("Pronunciation assessment result:\n");
68+
console.log(result); // the result is a JSON string, you can parse it with JSON.parse() when consuming it
69+
var getResponseTime = Date.now();
70+
console.log(`\nLatency = ${getResponseTime - uploadFinishTime}ms`);
71+
});
72+
});
73+
74+
// a common wave header, with zero audio length
75+
// since stream data doesn't contain header, but the API requires header to fetch format information, so you need post this header as first chunk for each query
76+
const waveHeader16K16BitMono = Buffer.from([82, 73, 70, 70, 78, 128, 0, 0, 87, 65, 86, 69, 102, 109, 116, 32, 18, 0, 0, 0, 1, 0, 1, 0, 128, 62, 0, 0, 0, 125, 0, 0, 2, 0, 16, 0, 0, 0, 100, 97, 116, 97, 0, 0, 0, 0]);
77+
req.write(waveHeader16K16BitMono);
78+
79+
// send request with chunked data
80+
var audioStream = fs.createReadStream("../GoodMorning.pcm", { highWaterMark: 1024 });
81+
audioStream.on("data", (data) => {
82+
sleep(data.length / 32); // to simulate human speaking rate
83+
});
84+
audioStream.on("end", () => {
85+
uploadFinishTime = Date.now();
86+
});
87+
88+
audioStream.pipe(req);
89+
90+
function sleep(milliseconds) {
91+
var startTime = Date.now();
92+
var endTime = Date.now();
93+
while (endTime < startTime + milliseconds) {
94+
endTime = Date.now();
95+
}
96+
}

0 commit comments

Comments
 (0)