@@ -3,7 +3,9 @@ const path = require('path')
33const mkdirp = require ( 'mkdirp' )
44const crypto = require ( 'crypto' )
55const express = require ( 'express' )
6+ const sanitize = require ( 'sanitize-filename' )
67const { runsox } = require ( './convert/sox.js' )
8+ const { wer } = require ( './utils' )
79const debug = require ( 'debug' ) ( 'botium-speech-processing-routes' )
810
911const cachePathStt = process . env . BOTIUM_SPEECH_CACHE_DIR && path . join ( process . env . BOTIUM_SPEECH_CACHE_DIR , 'stt' )
@@ -15,9 +17,6 @@ if (cachePathTts) mkdirp.sync(cachePathTts)
1517
1618const router = express . Router ( )
1719
18- const tts = new ( require ( `./tts/${ process . env . BOTIUM_SPEECH_PROVIDER_TTS } ` ) ) ( )
19- const stt = new ( require ( `./stt/${ process . env . BOTIUM_SPEECH_PROVIDER_STT } ` ) ) ( )
20-
2120/**
2221 * @swagger
2322 * components:
@@ -66,6 +65,19 @@ router.get('/api/status', (req, res) => {
6665 * required: true
6766 * schema:
6867 * type: string
68+ * - name: hint
69+ * description: Hint text for calculating the Levenshtein edit distance for the result text (word error rate)
70+ * in: query
71+ * required: false
72+ * schema:
73+ * type: string
74+ * - name: stt
75+ * description: Speech-to-text backend
76+ * in: query
77+ * required: false
78+ * schema:
79+ * type: string
80+ * enum: [kaldi, google]
6981 * requestBody:
7082 * description: Audio file
7183 * content:
@@ -97,10 +109,15 @@ router.post('/api/stt/:language', async (req, res, next) => {
97109 }
98110 }
99111 try {
112+ const stt = new ( require ( `./stt/${ ( req . query . stt && sanitize ( req . query . stt ) ) || process . env . BOTIUM_SPEECH_PROVIDER_STT } ` ) ) ( )
113+
100114 const result = await stt . stt ( {
101115 language : req . params . language ,
102116 buffer : req . body
103117 } )
118+ if ( req . query . hint ) {
119+ result . wer = await wer ( req . query . hint , result . text )
120+ }
104121 res . json ( result ) . end ( )
105122
106123 if ( cachePathStt ) {
@@ -137,6 +154,13 @@ router.post('/api/stt/:language', async (req, res, next) => {
137154 * required: true
138155 * schema:
139156 * type: string
157+ * - name: tts
158+ * description: Text-to-speech backend
159+ * in: query
160+ * required: false
161+ * schema:
162+ * type: string
163+ * enum: [marytts, picotts]
140164 * responses:
141165 * 200:
142166 * description: Audio file
@@ -169,6 +193,8 @@ router.get('/api/tts/:language', async (req, res, next) => {
169193 }
170194 }
171195 try {
196+ const tts = new ( require ( `./tts/${ ( req . query . tts && sanitize ( req . query . tts ) ) || process . env . BOTIUM_SPEECH_PROVIDER_TTS } ` ) ) ( )
197+
172198 const { buffer, name } = await tts . tts ( {
173199 language : req . params . language ,
174200 text : req . query . text
@@ -249,4 +275,40 @@ router.post('/api/convert/:profile', async (req, res, next) => {
249275 }
250276} )
251277
278+ /**
279+ * @swagger
280+ * /api/wer:
281+ * get:
282+ * description: Calculate Levenshtein edit distance between two strings (word error rate)
283+ * security:
284+ * - ApiKeyAuth: []
285+ * produces:
286+ * - application/json
287+ * parameters:
288+ * - name: text1
289+ * description: Text
290+ * in: query
291+ * required: true
292+ * schema:
293+ * type: string
294+ * - name: text2
295+ * description: Text
296+ * in: query
297+ * required: true
298+ * schema:
299+ * type: string
300+ * responses:
301+ * 200:
302+ * description: Levenshtein Edit Distance on word level
303+ * schema:
304+ * properties:
305+ * distance:
306+ * type: integer
307+ * wer:
308+ * type: number
309+ */
310+ router . get ( '/api/wer' , async ( req , res ) => {
311+ res . json ( await wer ( req . query . text1 , req . query . text2 ) )
312+ } )
313+
252314module . exports = router
0 commit comments