Skip to content

Commit 570097c

Browse files
author
Florian Treml
committed
BOT-1862 added support for webm
1 parent 5d55926 commit 570097c

File tree

7 files changed

+142
-95
lines changed

7 files changed

+142
-95
lines changed

docker-compose-dev.yml

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -22,30 +22,30 @@ services:
2222
# volumes:
2323
# - "./watcher:/app/watch"
2424
# - "./logs/watcher:/app/logs"
25-
# stt_en:
26-
# build:
27-
# context: stt
28-
# dockerfile: Dockerfile.kaldi.en
29-
# image: botium/botium-speech-kaldi-en:develop
30-
# restart: always
31-
# volumes:
32-
# - "./logs/stt_en:/opt/logs"
33-
# stt_de:
34-
# build:
35-
# context: stt
36-
# dockerfile: Dockerfile.kaldi.de
37-
# image: botium/botium-speech-kaldi-de:develop
38-
# restart: always
39-
# volumes:
40-
# - "./logs/stt_de:/opt/logs"
41-
tts:
25+
stt_en:
4226
build:
43-
context: tts
44-
dockerfile: Dockerfile.marytts
45-
image: botium/botium-speech-marytts:develop
27+
context: stt
28+
dockerfile: Dockerfile.kaldi.en
29+
image: botium/botium-speech-kaldi-en:develop
4630
restart: always
47-
# dictate:
31+
volumes:
32+
- "./logs/stt_en:/opt/logs"
33+
stt_de:
34+
build:
35+
context: stt
36+
dockerfile: Dockerfile.kaldi.de
37+
image: botium/botium-speech-kaldi-de:develop
38+
restart: always
39+
volumes:
40+
- "./logs/stt_de:/opt/logs"
41+
# tts:
4842
# build:
49-
# context: dictate
50-
# image: botium/botium-speech-dictate:develop
43+
# context: tts
44+
# dockerfile: Dockerfile.marytts
45+
# image: botium/botium-speech-marytts:develop
5146
# restart: always
47+
dictate:
48+
build:
49+
context: dictate
50+
image: botium/botium-speech-dictate:develop
51+
restart: always

frontend/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
FROM ubuntu:18.04
22

33
RUN apt-get update && apt-get -y install curl gnupg && curl -sL https://deb.nodesource.com/setup_14.x | bash - && apt-get -y install nodejs
4-
RUN apt-get install -y dos2unix sox libsox-fmt-mp3 libttspico-utils
4+
RUN apt-get install -y dos2unix sox libsox-fmt-mp3 libttspico-utils ffmpeg
55

66
WORKDIR /app
77
COPY ./package.json /app/package.json

frontend/resources/.env

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,9 @@ BOTIUM_SPEECH_GOOGLE_CONFIG={}
3939
#BOTIUM_SPEECH_GOOGLE_API_VERSION=
4040

4141
# WAV Conversation Command Line
42-
BOTIUM_SPEECH_CONVERT_PROFILE_WAVTOMONOWAV_SOX=sox -t wav - -r 16k -t wav -c 1 -b 16 -e signed {{{output}}}
42+
BOTIUM_SPEECH_CONVERT_PROFILE_WAVTOMONOWAV_CMD=sox -t wav - -r 16k -t wav -c 1 -b 16 -e signed {{{output}}}
4343
BOTIUM_SPEECH_CONVERT_PROFILE_WAVTOMONOWAV_OUTPUT=output.wav
44-
BOTIUM_SPEECH_CONVERT_PROFILE_MP3TOMONOWAV_SOX=sox -t mp3 - -r 16k -t wav -c 1 -b 16 -e signed {{{output}}}
45-
BOTIUM_SPEECH_CONVERT_PROFILE_MP3TOMONOWAV_OUTPUT=output.wav
44+
BOTIUM_SPEECH_CONVERT_PROFILE_MP3TOMONOWAV_CMD=sox -t mp3 - -r 16k -t wav -c 1 -b 16 -e signed {{{output}}}
45+
BOTIUM_SPEECH_CONVERT_PROFILE_MP3TOMONOWAV_OUTPUT=output.wav
46+
BOTIUM_SPEECH_CONVERT_PROFILE_WEBMTOMONOWAV_CMD=ffmpeg -i - -f wav - | sox -t wav - -r 16k -t wav -c 1 -b 16 -e signed {{{output}}}
47+
BOTIUM_SPEECH_CONVERT_PROFILE_WEBMTOMONOWAV_OUTPUT=output.wav

frontend/src/convert/convert.js

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
const fs = require('fs')
2+
const Mustache = require('mustache')
3+
const { spawn } = require('child_process')
4+
const { v1: uuidv1 } = require('uuid')
5+
const debug = require('debug')('botium-speech-processing-convert')
6+
7+
const runconvert = (cmdLine, outputFile, { inputBuffer, start, end }) => {
8+
return new Promise((resolve, reject) => {
9+
const output = `${process.env.BOTIUM_SPEECH_TMP_DIR || '/tmp'}/${uuidv1()}_${outputFile}`
10+
11+
let cmdLineFull = Mustache.render(cmdLine, { output })
12+
if (start && end) {
13+
cmdLineFull = `${cmdLineFull} trim ${start} ${end}`
14+
} else if (start && !end) {
15+
cmdLineFull = `${cmdLineFull} trim ${start}`
16+
} else if (!start && end) {
17+
cmdLineFull = `${cmdLineFull} trim 0 ${end}`
18+
}
19+
debug(`cmdLineFull: ${cmdLineFull}`)
20+
const childProcess = spawn('/bin/sh', ['-c', cmdLineFull])
21+
22+
childProcess.once('exit', (code, signal) => {
23+
debug(`conversion process exited with code ${code}, signal ${signal}`)
24+
if (code === 0) {
25+
try {
26+
const outputBuffer = fs.readFileSync(output)
27+
fs.unlinkSync(output)
28+
resolve(outputBuffer)
29+
} catch (err) {
30+
reject(new Error(`conversion process output file ${output} not readable: ${err.message}`))
31+
}
32+
} else {
33+
reject(new Error(`conversion process exited with code ${code}, signal ${signal}`))
34+
}
35+
})
36+
childProcess.once('error', (err) => {
37+
debug(`conversion process failed: ${err.message}`)
38+
reject(new Error(`conversion process failed: ${err.message}`))
39+
})
40+
childProcess.stdout.on('error', (err) => {
41+
debug('stdout err ' + err)
42+
})
43+
childProcess.stderr.on('error', (err) => {
44+
debug('stderr err ' + err)
45+
})
46+
childProcess.stdin.on('error', (err) => {
47+
debug('stdin err ' + err)
48+
})
49+
childProcess.stderr.on('data', (data) => {
50+
debug('stderr ' + data)
51+
})
52+
53+
childProcess.stdin.write(inputBuffer)
54+
childProcess.stdin.end()
55+
})
56+
}
57+
58+
module.exports = {
59+
runconvert
60+
}

frontend/src/convert/sox.js

Lines changed: 0 additions & 61 deletions
This file was deleted.

frontend/src/routes.js

Lines changed: 28 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ const mkdirp = require('mkdirp')
44
const crypto = require('crypto')
55
const express = require('express')
66
const sanitize = require('sanitize-filename')
7-
const { runsox } = require('./convert/sox')
7+
const { runconvert } = require('./convert/convert')
88
const { wer } = require('./utils')
99
const debug = require('debug')('botium-speech-processing-routes')
1010

@@ -289,6 +289,27 @@ router.get('/api/tts/:language', async (req, res, next) => {
289289
}
290290
})
291291

292+
/**
293+
* @swagger
294+
* /api/convertprofiles:
295+
* get:
296+
* description: Get list of audio conversion profile
297+
* security:
298+
* - ApiKeyAuth: []
299+
* produces:
300+
* - application/json
301+
* responses:
302+
* 200:
303+
* description: List of supported audio conversion profiles
304+
* schema:
305+
* type: array
306+
* items:
307+
* type: string
308+
*/
309+
router.get('/api/convertprofiles', async (req, res, next) => {
310+
res.json(Object.keys(process.env).filter(e => e.startsWith('BOTIUM_SPEECH_CONVERT_PROFILE_') && e.endsWith('_CMD')).map(e => e.split('_')[4]))
311+
})
312+
292313
/**
293314
* @swagger
294315
* /api/convert/{profile}:
@@ -320,7 +341,7 @@ router.get('/api/tts/:language', async (req, res, next) => {
320341
* requestBody:
321342
* description: Audio file
322343
* content:
323-
* audio/*:
344+
* audio/wav:
324345
* schema:
325346
* type: string
326347
* format: binary
@@ -334,20 +355,21 @@ router.get('/api/tts/:language', async (req, res, next) => {
334355
* format: binary
335356
*/
336357
router.post('/api/convert/:profile', async (req, res, next) => {
358+
console.log(req.body)
337359
if (!Buffer.isBuffer(req.body)) {
338360
return next(new Error('req.body is not a buffer'))
339361
}
340-
const envVarSox = `BOTIUM_SPEECH_CONVERT_PROFILE_${req.params.profile.toUpperCase()}_SOX`
341-
if (!process.env[envVarSox]) {
342-
return next(new Error(`Environment variable ${envVarSox} empty`))
362+
const envVarCmd = `BOTIUM_SPEECH_CONVERT_PROFILE_${req.params.profile.toUpperCase()}_CMD`
363+
if (!process.env[envVarCmd]) {
364+
return next(new Error(`Environment variable ${envVarCmd} empty`))
343365
}
344366
const envVarOutput = `BOTIUM_SPEECH_CONVERT_PROFILE_${req.params.profile.toUpperCase()}_OUTPUT`
345367
if (!process.env[envVarOutput]) {
346368
return next(new Error(`Environment variable ${envVarOutput} empty`))
347369
}
348370

349371
try {
350-
const outputBuffer = await runsox(process.env[envVarSox], { inputBuffer: req.body, start: req.query.start, end: req.query.end })
372+
const outputBuffer = await runconvert(process.env[envVarCmd], process.env[envVarOutput], { inputBuffer: req.body, start: req.query.start, end: req.query.end })
351373
res.writeHead(200, {
352374
'Content-disposition': `attachment; filename="${process.env[envVarOutput]}"`,
353375
'Content-Length': outputBuffer.length

frontend/src/swagger.json

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,30 @@
229229
}
230230
}
231231
},
232+
"/api/convertprofiles": {
233+
"get": {
234+
"description": "Get list of audio conversion profile",
235+
"security": [
236+
{
237+
"ApiKeyAuth": []
238+
}
239+
],
240+
"produces": [
241+
"application/json"
242+
],
243+
"responses": {
244+
"200": {
245+
"description": "List of supported audio conversion profiles",
246+
"schema": {
247+
"type": "array",
248+
"items": {
249+
"type": "string"
250+
}
251+
}
252+
}
253+
}
254+
}
255+
},
232256
"/api/convert/{profile}": {
233257
"post": {
234258
"description": "Convert audio file",
@@ -272,7 +296,7 @@
272296
"requestBody": {
273297
"description": "Audio file",
274298
"content": {
275-
"audio/*": {
299+
"audio/wav": {
276300
"schema": {
277301
"type": "string",
278302
"format": "binary"

0 commit comments

Comments
 (0)