Skip to content

Commit 03b03d4

Browse files
author
Botium
authored
Merge pull request #14 from codeforequity-at/develop
BOT-2021 added language queries
2 parents 8d38acd + c66ec98 commit 03b03d4

File tree

8 files changed

+211
-17
lines changed

8 files changed

+211
-17
lines changed

frontend/package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
"@google-cloud/storage": "^5.7.0",
1717
"@google-cloud/text-to-speech": "^3.1.3",
1818
"body-parser": "^1.19.0",
19+
"cheerio": "^1.0.0-rc.5",
1920
"cross-env": "^7.0.3",
2021
"debug": "^4.3.1",
2122
"dotenv-flow": "^3.2.0",

frontend/src/routes.js

Lines changed: 72 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@ const debug = require('debug')('botium-speech-processing-routes')
1010

1111
const cachePathStt = process.env.BOTIUM_SPEECH_CACHE_DIR && path.join(process.env.BOTIUM_SPEECH_CACHE_DIR, 'stt')
1212
const cachePathTts = process.env.BOTIUM_SPEECH_CACHE_DIR && path.join(process.env.BOTIUM_SPEECH_CACHE_DIR, 'tts')
13-
const cacheKeyStt = (data, language, ext) => `${crypto.createHash('md5').update(data).digest('hex')}_${language}${ext}`
14-
const cacheKeyTts = (data, language, voice, ext) => `${crypto.createHash('md5').update(data).digest('hex')}_${language}_${voice || 'default'}${ext}`
13+
const cacheKeyStt = (data, language, ext) => sanitize(`${crypto.createHash('md5').update(data).digest('hex')}_${language}${ext}`)
14+
const cacheKeyTts = (data, language, voice, ext) => sanitize(`${crypto.createHash('md5').update(data).digest('hex')}_${language}_${voice || 'default'}${ext}`)
1515

1616
if (cachePathStt) mkdirp.sync(cachePathStt)
1717
if (cachePathTts) mkdirp.sync(cachePathTts)
@@ -64,6 +64,40 @@ router.get('/api/status', (req, res) => {
6464
res.json({ status: 'OK' })
6565
})
6666

67+
/**
68+
* @swagger
69+
* /api/sttlanguages:
70+
* get:
71+
* description: Get list of STT languages
72+
* security:
73+
* - ApiKeyAuth: []
74+
* produces:
75+
* - application/json
76+
* parameters:
77+
* - name: stt
78+
* description: Speech-to-text backend
79+
* in: query
80+
* required: false
81+
* schema:
82+
* type: string
83+
* enum: [kaldi, google]
84+
* responses:
85+
* 200:
86+
* description: List of supported STT languages
87+
* schema:
88+
* type: array
89+
* items:
90+
* type: string
91+
*/
92+
router.get('/api/sttlanguages', async (req, res, next) => {
93+
try {
94+
const stt = sttEngines[(req.query.stt && sanitize(req.query.stt)) || process.env.BOTIUM_SPEECH_PROVIDER_STT]
95+
res.json(await stt.languages())
96+
} catch (err) {
97+
return next(err)
98+
}
99+
})
100+
67101
/**
68102
* @swagger
69103
* /api/stt/{language}:
@@ -75,7 +109,7 @@ router.get('/api/status', (req, res) => {
75109
* - application/json
76110
* parameters:
77111
* - name: language
78-
* description: ISO-639-1 language code (2 letters)
112+
* description: Language code (as returned from sttlanguages endpoint)
79113
* in: path
80114
* required: true
81115
* schema:
@@ -192,6 +226,40 @@ router.get('/api/ttsvoices', async (req, res, next) => {
192226
}
193227
})
194228

229+
/**
230+
* @swagger
231+
* /api/ttslanguages:
232+
* get:
233+
* description: Get list of TTS languages
234+
* security:
235+
* - ApiKeyAuth: []
236+
* produces:
237+
* - application/json
238+
* parameters:
239+
* - name: tts
240+
* description: Text-to-speech backend
241+
* in: query
242+
* required: false
243+
* schema:
244+
* type: string
245+
* enum: [google, marytts, picotts]
246+
* responses:
247+
* 200:
248+
* description: List of supported TTS languages
249+
* schema:
250+
* type: array
251+
* items:
252+
* type: string
253+
*/
254+
router.get('/api/ttslanguages', async (req, res, next) => {
255+
try {
256+
const tts = ttsEngines[(req.query.tts && sanitize(req.query.tts)) || process.env.BOTIUM_SPEECH_PROVIDER_TTS]
257+
res.json(await tts.languages())
258+
} catch (err) {
259+
return next(err)
260+
}
261+
})
262+
195263
/**
196264
* @swagger
197265
* /api/tts/{language}:
@@ -203,7 +271,7 @@ router.get('/api/ttsvoices', async (req, res, next) => {
203271
* - audio/wav
204272
* parameters:
205273
* - name: language
206-
* description: ISO-639-1 language code (2 letters)
274+
* description: Language code (as returned from ttslanguages endpoint)
207275
* in: path
208276
* required: true
209277
* schema:

frontend/src/stt/google.js

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,40 @@
1+
const _ = require('lodash')
12
const { v1: uuidv1 } = require('uuid')
23
const speech = process.env.BOTIUM_SPEECH_GOOGLE_API_VERSION ? require('@google-cloud/speech')[process.env.BOTIUM_SPEECH_GOOGLE_API_VERSION] : require('@google-cloud/speech')
34
const storage = require('@google-cloud/storage')
5+
const request = require('request-promise-native')
6+
const cheerio = require('cheerio')
47
const debug = require('debug')('botium-speech-processing-google-stt')
58

69
const { googleOptions } = require('../utils')
710

11+
const GOOGLE_STT_LANGUAGES_URL = 'https://cloud.google.com/speech-to-text/docs/languages'
12+
const downloadLanguageCodes = async () => {
13+
debug(`Downloading language codes from ${GOOGLE_STT_LANGUAGES_URL}`)
14+
const htmlString = await request(GOOGLE_STT_LANGUAGES_URL)
15+
const $ = cheerio.load(htmlString)
16+
17+
const languageCodes = []
18+
$('#lang-table-container table tbody tr').each(function () {
19+
const tds = $(this).find('td')
20+
const languageCode = $(tds[1]).text().trim()
21+
if (languageCode) {
22+
languageCodes.push(languageCode)
23+
}
24+
})
25+
return languageCodes
26+
}
27+
28+
let languageCodes = null
29+
830
class GoogleSTT {
31+
async languages () {
32+
if (!languageCodes) {
33+
languageCodes = _.uniq(await downloadLanguageCodes()).sort()
34+
}
35+
return languageCodes
36+
}
37+
938
async stt ({ language, buffer }) {
1039
const speechClient = new speech.SpeechClient(googleOptions())
1140
const storageClient = new storage.Storage(googleOptions())

frontend/src/stt/kaldi.js

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,15 @@
11
const util = require('util')
2+
const _ = require('lodash')
23
const Mustache = require('mustache')
34
const request = require('request-promise-native')
45
const debug = require('debug')('botium-speech-processing-kaldi')
56

67
class KaldiSTT {
8+
async languages () {
9+
const envKeys = Object.keys(process.env).filter(k => k.startsWith('BOTIUM_SPEECH_KALDI_URL_'))
10+
return _.uniq(envKeys.map(k => k.split('_')[4].toLowerCase())).sort()
11+
}
12+
713
async stt ({ language, buffer }) {
814
const envVarUrl = `BOTIUM_SPEECH_KALDI_URL_${language.toUpperCase()}`
915
if (!process.env[envVarUrl]) throw new Error(`Environment variable ${envVarUrl} empty`)

frontend/src/swagger.json

Lines changed: 81 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,45 @@
3636
}
3737
}
3838
},
39+
"/api/sttlanguages": {
40+
"get": {
41+
"description": "Get list of STT languages",
42+
"security": [
43+
{
44+
"ApiKeyAuth": []
45+
}
46+
],
47+
"produces": [
48+
"application/json"
49+
],
50+
"parameters": [
51+
{
52+
"name": "stt",
53+
"description": "Speech-to-text backend",
54+
"in": "query",
55+
"required": false,
56+
"schema": {
57+
"type": "string",
58+
"enum": [
59+
"kaldi",
60+
"google"
61+
]
62+
}
63+
}
64+
],
65+
"responses": {
66+
"200": {
67+
"description": "List of supported STT languages",
68+
"schema": {
69+
"type": "array",
70+
"items": {
71+
"type": "string"
72+
}
73+
}
74+
}
75+
}
76+
}
77+
},
3978
"/api/stt/{language}": {
4079
"post": {
4180
"description": "Convert audio file to text",
@@ -50,7 +89,7 @@
5089
"parameters": [
5190
{
5291
"name": "language",
53-
"description": "ISO-639-1 language code (2 letters)",
92+
"description": "Language code (as returned from sttlanguages endpoint)",
5493
"in": "path",
5594
"required": true,
5695
"schema": {
@@ -160,6 +199,46 @@
160199
}
161200
}
162201
},
202+
"/api/ttslanguages": {
203+
"get": {
204+
"description": "Get list of TTS languages",
205+
"security": [
206+
{
207+
"ApiKeyAuth": []
208+
}
209+
],
210+
"produces": [
211+
"application/json"
212+
],
213+
"parameters": [
214+
{
215+
"name": "tts",
216+
"description": "Text-to-speech backend",
217+
"in": "query",
218+
"required": false,
219+
"schema": {
220+
"type": "string",
221+
"enum": [
222+
"google",
223+
"marytts",
224+
"picotts"
225+
]
226+
}
227+
}
228+
],
229+
"responses": {
230+
"200": {
231+
"description": "List of supported TTS languages",
232+
"schema": {
233+
"type": "array",
234+
"items": {
235+
"type": "string"
236+
}
237+
}
238+
}
239+
}
240+
}
241+
},
163242
"/api/tts/{language}": {
164243
"get": {
165244
"description": "Convert text file to audio",
@@ -174,7 +253,7 @@
174253
"parameters": [
175254
{
176255
"name": "language",
177-
"description": "ISO-639-1 language code (2 letters)",
256+
"description": "Language code (as returned from ttslanguages endpoint)",
178257
"in": "path",
179258
"required": true,
180259
"schema": {

frontend/src/tts/google.js

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
const _ = require('lodash')
12
const textToSpeech = require('@google-cloud/text-to-speech')
23
const debug = require('debug')('botium-speech-processing-google-tts')
34

@@ -26,13 +27,18 @@ class GoogleTTS {
2627
googleVoices.push({
2728
name: voice.name,
2829
gender: genderMap[voice.ssmlGender],
29-
language: languageCode.split('-')[0]
30+
language: languageCode
3031
})
3132
})
3233
})
3334
return googleVoices
3435
}
3536

37+
async languages () {
38+
const voicesList = await this.voices()
39+
return _.uniq(voicesList.map(v => v.language)).sort()
40+
}
41+
3642
async tts ({ language, voice, text }) {
3743
const voiceSelector = {
3844
languageCode: language

frontend/src/tts/marytts.js

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,11 +35,16 @@ class MaryTTS {
3535
return maryVoices
3636
}
3737

38+
async languages () {
39+
const voicesList = await this.voices()
40+
return _.uniq(voicesList.map(v => v.language)).sort()
41+
}
42+
3843
async tts ({ language, voice, text }) {
3944
const voicesList = await this.voices()
4045

4146
const maryVoice = voicesList.find(v => {
42-
if (language && v.language !== language) return false
47+
if (language && !v.language.startsWith(language)) return false
4348
if (voice && v.name !== voice) return false
4449
return true
4550
})

frontend/src/tts/picotts.js

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
const fs = require('fs')
2+
const _ = require('lodash')
23
const { spawn } = require('child_process')
34
const { v1: uuidv1 } = require('uuid')
45
const debug = require('debug')('botium-speech-processing-picotts')
@@ -7,13 +8,13 @@ const { ttsFilename } = require('../utils')
78

89
const voicesList = [
910
{
10-
name: 'en-EN',
11-
language: 'en',
11+
name: 'en-US',
12+
language: 'en-US',
1213
gender: 'neutral'
1314
},
1415
{
1516
name: 'en-GB',
16-
language: 'en',
17+
language: 'en-GB',
1718
gender: 'neutral'
1819
},
1920
{
@@ -26,11 +27,6 @@ const voicesList = [
2627
language: 'de',
2728
gender: 'neutral'
2829
},
29-
{
30-
name: 'en-GB',
31-
language: 'en',
32-
gender: 'neutral'
33-
},
3430
{
3531
name: 'fr-FR',
3632
language: 'fr',
@@ -48,9 +44,13 @@ class PicoTTS {
4844
return voicesList
4945
}
5046

47+
async languages () {
48+
return _.uniq(voicesList.map(v => v.language)).sort()
49+
}
50+
5151
async tts ({ language, voice, text }) {
5252
const picoVoice = voicesList.find(v => {
53-
if (language && v.language !== language) return false
53+
if (language && !v.language.startsWith(language)) return false
5454
if (voice && v.name !== voice) return false
5555
return true
5656
})

0 commit comments

Comments
 (0)