Skip to content

Commit 115d0d4

Browse files
author
Botium
authored
Merge pull request #7 from codeforequity-at/features/calculate-word-error-rate
features/calculate-word-error-rate
2 parents 71ed798 + d4d70c7 commit 115d0d4

File tree

5 files changed

+164
-4
lines changed

5 files changed

+164
-4
lines changed

frontend/package.json

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,12 @@
2525
"nodemon": "^2.0.1",
2626
"request": "^2.88.0",
2727
"request-promise-native": "^1.0.8",
28+
"sanitize-filename": "^1.6.3",
2829
"swagger-jsdoc": "^3.5.0",
2930
"swagger-ui-express": "^4.1.2",
3031
"uuid": "^3.3.3",
31-
"winston": "^3.2.1"
32+
"winston": "^3.2.1",
33+
"word-error-rate": "0.0.7"
3234
},
3335
"devDependencies": {
3436
"eslint": "^6.7.2",

frontend/src/routes.js

Lines changed: 65 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,9 @@ const path = require('path')
33
const mkdirp = require('mkdirp')
44
const crypto = require('crypto')
55
const express = require('express')
6+
const sanitize = require('sanitize-filename')
67
const { runsox } = require('./convert/sox.js')
8+
const { wer } = require('./utils')
79
const debug = require('debug')('botium-speech-processing-routes')
810

911
const cachePathStt = process.env.BOTIUM_SPEECH_CACHE_DIR && path.join(process.env.BOTIUM_SPEECH_CACHE_DIR, 'stt')
@@ -15,9 +17,6 @@ if (cachePathTts) mkdirp.sync(cachePathTts)
1517

1618
const router = express.Router()
1719

18-
const tts = new (require(`./tts/${process.env.BOTIUM_SPEECH_PROVIDER_TTS}`))()
19-
const stt = new (require(`./stt/${process.env.BOTIUM_SPEECH_PROVIDER_STT}`))()
20-
2120
/**
2221
* @swagger
2322
* components:
@@ -66,6 +65,19 @@ router.get('/api/status', (req, res) => {
6665
* required: true
6766
* schema:
6867
* type: string
68+
* - name: hint
69+
* description: Hint text for calculating the Levenshtein edit distance for the result text (word error rate)
70+
* in: query
71+
* required: false
72+
* schema:
73+
* type: string
74+
* - name: stt
75+
* description: Speech-to-text backend
76+
* in: query
77+
* required: false
78+
* schema:
79+
* type: string
80+
* enum: [kaldi, google]
6981
* requestBody:
7082
* description: Audio file
7183
* content:
@@ -97,10 +109,15 @@ router.post('/api/stt/:language', async (req, res, next) => {
97109
}
98110
}
99111
try {
112+
const stt = new (require(`./stt/${(req.query.stt && sanitize(req.query.stt)) || process.env.BOTIUM_SPEECH_PROVIDER_STT}`))()
113+
100114
const result = await stt.stt({
101115
language: req.params.language,
102116
buffer: req.body
103117
})
118+
if (req.query.hint) {
119+
result.wer = await wer(req.query.hint, result.text)
120+
}
104121
res.json(result).end()
105122

106123
if (cachePathStt) {
@@ -137,6 +154,13 @@ router.post('/api/stt/:language', async (req, res, next) => {
137154
* required: true
138155
* schema:
139156
* type: string
157+
* - name: tts
158+
* description: Text-to-speech backend
159+
* in: query
160+
* required: false
161+
* schema:
162+
* type: string
163+
* enum: [marytts, picotts]
140164
* responses:
141165
* 200:
142166
* description: Audio file
@@ -169,6 +193,8 @@ router.get('/api/tts/:language', async (req, res, next) => {
169193
}
170194
}
171195
try {
196+
const tts = new (require(`./tts/${(req.query.tts && sanitize(req.query.tts)) || process.env.BOTIUM_SPEECH_PROVIDER_TTS}`))()
197+
172198
const { buffer, name } = await tts.tts({
173199
language: req.params.language,
174200
text: req.query.text
@@ -249,4 +275,40 @@ router.post('/api/convert/:profile', async (req, res, next) => {
249275
}
250276
})
251277

278+
/**
279+
* @swagger
280+
* /api/wer:
281+
* get:
282+
* description: Calculate Levenshtein edit distance between two strings (word error rate)
283+
* security:
284+
* - ApiKeyAuth: []
285+
* produces:
286+
* - application/json
287+
* parameters:
288+
* - name: text1
289+
* description: Text
290+
* in: query
291+
* required: true
292+
* schema:
293+
* type: string
294+
* - name: text2
295+
* description: Text
296+
* in: query
297+
* required: true
298+
* schema:
299+
* type: string
300+
* responses:
301+
* 200:
302+
* description: Levenshtein Edit Distance on word level
303+
* schema:
304+
* properties:
305+
* distance:
306+
* type: integer
307+
* wer:
308+
* type: number
309+
*/
310+
router.get('/api/wer', async (req, res) => {
311+
res.json(await wer(req.query.text1, req.query.text2))
312+
})
313+
252314
module.exports = router

frontend/src/stt/kaldi.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ class Kaldi {
1818

1919
let response
2020
try {
21+
debug(`Calling kaldi url ${requestOptions.uri} ...`)
2122
response = await request(requestOptions)
2223
} catch (err) {
2324
throw new Error(`Calling url ${requestOptions.uri} failed: ${err.message}`)

frontend/src/swagger.json

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,28 @@
5656
"schema": {
5757
"type": "string"
5858
}
59+
},
60+
{
61+
"name": "hint",
62+
"description": "Hint text for calculating the Levenshtein edit distance for the result text (word error rate)",
63+
"in": "query",
64+
"required": false,
65+
"schema": {
66+
"type": "string"
67+
}
68+
},
69+
{
70+
"name": "stt",
71+
"description": "Speech-to-text backend",
72+
"in": "query",
73+
"required": false,
74+
"schema": {
75+
"type": "string",
76+
"enum": [
77+
"kaldi",
78+
"google"
79+
]
80+
}
5981
}
6082
],
6183
"requestBody": {
@@ -112,6 +134,19 @@
112134
"schema": {
113135
"type": "string"
114136
}
137+
},
138+
{
139+
"name": "tts",
140+
"description": "Text-to-speech backend",
141+
"in": "query",
142+
"required": false,
143+
"schema": {
144+
"type": "string",
145+
"enum": [
146+
"marytts",
147+
"picotts"
148+
]
149+
}
115150
}
116151
],
117152
"responses": {
@@ -176,6 +211,54 @@
176211
}
177212
}
178213
}
214+
},
215+
"/api/wer": {
216+
"get": {
217+
"description": "Calculate Levenshtein edit distance between two strings (word error rate)",
218+
"security": [
219+
{
220+
"ApiKeyAuth": []
221+
}
222+
],
223+
"produces": [
224+
"application/json"
225+
],
226+
"parameters": [
227+
{
228+
"name": "text1",
229+
"description": "Text",
230+
"in": "query",
231+
"required": true,
232+
"schema": {
233+
"type": "string"
234+
}
235+
},
236+
{
237+
"name": "text2",
238+
"description": "Text",
239+
"in": "query",
240+
"required": true,
241+
"schema": {
242+
"type": "string"
243+
}
244+
}
245+
],
246+
"responses": {
247+
"200": {
248+
"description": "Levenshtein Edit Distance on word level",
249+
"schema": {
250+
"properties": {
251+
"distance": {
252+
"type": "integer"
253+
},
254+
"wer": {
255+
"type": "number"
256+
}
257+
}
258+
}
259+
}
260+
}
261+
}
179262
}
180263
},
181264
"components": {

frontend/src/utils.js

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
const speechScorer = require('word-error-rate')
2+
3+
const wer = async (text1, text2) => {
4+
return {
5+
distance: speechScorer.calculateEditDistance(text1 || '', text2 || ''),
6+
wer: speechScorer.wordErrorRate(text1 || '', text2 || '')
7+
}
8+
}
9+
10+
module.exports = {
11+
wer
12+
}

0 commit comments

Comments
 (0)