BOT-1862 added support for webm

Florian Treml · Florian Treml · commit 570097c36a83 · 2020-12-18T15:44:24.000+01:00
diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml
@@ -22,30 +22,30 @@ services:
 #    volumes:
 #      - "./watcher:/app/watch"
 #      - "./logs/watcher:/app/logs"
-#  stt_en:
-#    build:
-#      context: stt
-#      dockerfile: Dockerfile.kaldi.en
-#    image: botium/botium-speech-kaldi-en:develop
-#    restart: always
-#    volumes:
-#      - "./logs/stt_en:/opt/logs"
-#  stt_de:
-#    build:
-#      context: stt
-#      dockerfile: Dockerfile.kaldi.de
-#    image: botium/botium-speech-kaldi-de:develop
-#    restart: always
-#    volumes:
-#      - "./logs/stt_de:/opt/logs"
-  tts:
+  stt_en:
     build:
-      context: tts
-      dockerfile: Dockerfile.marytts
-    image: botium/botium-speech-marytts:develop
+      context: stt
+      dockerfile: Dockerfile.kaldi.en
+    image: botium/botium-speech-kaldi-en:develop
     restart: always
-#  dictate:
+    volumes:
+      - "./logs/stt_en:/opt/logs"
+  stt_de:
+    build:
+      context: stt
+      dockerfile: Dockerfile.kaldi.de
+    image: botium/botium-speech-kaldi-de:develop
+    restart: always
+    volumes:
+      - "./logs/stt_de:/opt/logs"
+#  tts:
 #    build:
-#      context: dictate
-#    image: botium/botium-speech-dictate:develop
+#      context: tts
+#      dockerfile: Dockerfile.marytts
+#    image: botium/botium-speech-marytts:develop
 #    restart: always
+  dictate:
+    build:
+      context: dictate
+    image: botium/botium-speech-dictate:develop
+    restart: always
diff --git a/frontend/Dockerfile b/frontend/Dockerfile
@@ -1,7 +1,7 @@
 FROM ubuntu:18.04
 
 RUN apt-get update && apt-get -y install curl gnupg && curl -sL https://deb.nodesource.com/setup_14.x  | bash - && apt-get -y install nodejs
-RUN apt-get install -y dos2unix sox libsox-fmt-mp3 libttspico-utils
+RUN apt-get install -y dos2unix sox libsox-fmt-mp3 libttspico-utils ffmpeg
 
 WORKDIR /app
 COPY ./package.json /app/package.json
diff --git a/frontend/resources/.env b/frontend/resources/.env
@@ -39,7 +39,9 @@ BOTIUM_SPEECH_GOOGLE_CONFIG={}
 #BOTIUM_SPEECH_GOOGLE_API_VERSION=
 
 # WAV Conversation Command Line
-BOTIUM_SPEECH_CONVERT_PROFILE_WAVTOMONOWAV_SOX=sox -t wav - -r 16k -t wav -c 1 -b 16 -e signed {{{output}}}
+BOTIUM_SPEECH_CONVERT_PROFILE_WAVTOMONOWAV_CMD=sox -t wav - -r 16k -t wav -c 1 -b 16 -e signed {{{output}}}
 BOTIUM_SPEECH_CONVERT_PROFILE_WAVTOMONOWAV_OUTPUT=output.wav
-BOTIUM_SPEECH_CONVERT_PROFILE_MP3TOMONOWAV_SOX=sox -t mp3 - -r 16k -t wav -c 1 -b 16 -e signed {{{output}}}
-BOTIUM_SPEECH_CONVERT_PROFILE_MP3TOMONOWAV_OUTPUT=output.wav
+BOTIUM_SPEECH_CONVERT_PROFILE_MP3TOMONOWAV_CMD=sox -t mp3 - -r 16k -t wav -c 1 -b 16 -e signed {{{output}}}
+BOTIUM_SPEECH_CONVERT_PROFILE_MP3TOMONOWAV_OUTPUT=output.wav
+BOTIUM_SPEECH_CONVERT_PROFILE_WEBMTOMONOWAV_CMD=ffmpeg -i - -f wav - | sox -t wav - -r 16k -t wav -c 1 -b 16 -e signed {{{output}}}
+BOTIUM_SPEECH_CONVERT_PROFILE_WEBMTOMONOWAV_OUTPUT=output.wav
diff --git a/frontend/src/convert/convert.js b/frontend/src/convert/convert.js
@@ -0,0 +1,60 @@
+const fs = require('fs')
+const Mustache = require('mustache')
+const { spawn } = require('child_process')
+const { v1: uuidv1 } = require('uuid')
+const debug = require('debug')('botium-speech-processing-convert')
+
+const runconvert = (cmdLine, outputFile, { inputBuffer, start, end }) => {
+  return new Promise((resolve, reject) => {
+    const output = `${process.env.BOTIUM_SPEECH_TMP_DIR || '/tmp'}/${uuidv1()}_${outputFile}`
+
+    let cmdLineFull = Mustache.render(cmdLine, { output })
+    if (start && end) {
+      cmdLineFull = `${cmdLineFull} trim ${start} ${end}`
+    } else if (start && !end) {
+      cmdLineFull = `${cmdLineFull} trim ${start}`
+    } else if (!start && end) {
+      cmdLineFull = `${cmdLineFull} trim 0 ${end}`
+    }
+    debug(`cmdLineFull: ${cmdLineFull}`)
+    const childProcess = spawn('/bin/sh', ['-c', cmdLineFull])
+
+    childProcess.once('exit', (code, signal) => {
+      debug(`conversion process exited with code ${code}, signal ${signal}`)
+      if (code === 0) {
+        try {
+          const outputBuffer = fs.readFileSync(output)
+          fs.unlinkSync(output)
+          resolve(outputBuffer)
+        } catch (err) {
+          reject(new Error(`conversion process output file ${output} not readable: ${err.message}`))
+        }
+      } else {
+        reject(new Error(`conversion process exited with code ${code}, signal ${signal}`))
+      }
+    })
+    childProcess.once('error', (err) => {
+      debug(`conversion process failed: ${err.message}`)
+      reject(new Error(`conversion process failed: ${err.message}`))
+    })
+    childProcess.stdout.on('error', (err) => {
+      debug('stdout err ' + err)
+    })
+    childProcess.stderr.on('error', (err) => {
+      debug('stderr err ' + err)
+    })
+    childProcess.stdin.on('error', (err) => {
+      debug('stdin err ' + err)
+    })
+    childProcess.stderr.on('data', (data) => {
+      debug('stderr ' + data)
+    })
+
+    childProcess.stdin.write(inputBuffer)
+    childProcess.stdin.end()
+  })
+}
+
+module.exports = {
+  runconvert
+}
diff --git a/frontend/src/convert/sox.js b/frontend/src/convert/sox.js
diff --git a/frontend/src/routes.js b/frontend/src/routes.js
@@ -4,7 +4,7 @@ const mkdirp = require('mkdirp')
 const crypto = require('crypto')
 const express = require('express')
 const sanitize = require('sanitize-filename')
-const { runsox } = require('./convert/sox')
+const { runconvert } = require('./convert/convert')
 const { wer } = require('./utils')
 const debug = require('debug')('botium-speech-processing-routes')
 
@@ -289,6 +289,27 @@ router.get('/api/tts/:language', async (req, res, next) => {
   }
 })
 
+/**
+ * @swagger
+ * /api/convertprofiles:
+ *   get:
+ *     description: Get list of audio conversion profile
+ *     security:
+ *       - ApiKeyAuth: []
+ *     produces:
+ *       - application/json
+ *     responses:
+ *       200:
+ *         description: List of supported audio conversion profiles
+ *         schema:
+ *           type: array
+ *           items:
+ *             type: string
+ */
+router.get('/api/convertprofiles', async (req, res, next) => {
+  res.json(Object.keys(process.env).filter(e => e.startsWith('BOTIUM_SPEECH_CONVERT_PROFILE_') && e.endsWith('_CMD')).map(e => e.split('_')[4]))
+})
+
 /**
  * @swagger
  * /api/convert/{profile}:
@@ -320,7 +341,7 @@ router.get('/api/tts/:language', async (req, res, next) => {
  *     requestBody:
  *       description: Audio file
  *       content:
- *         audio/*:
+ *         audio/wav:
  *           schema:
  *             type: string
  *             format: binary
@@ -334,20 +355,21 @@ router.get('/api/tts/:language', async (req, res, next) => {
  *               format: binary
  */
 router.post('/api/convert/:profile', async (req, res, next) => {
+  console.log(req.body)
   if (!Buffer.isBuffer(req.body)) {
     return next(new Error('req.body is not a buffer'))
   }
-  const envVarSox = `BOTIUM_SPEECH_CONVERT_PROFILE_${req.params.profile.toUpperCase()}_SOX`
-  if (!process.env[envVarSox]) {
-    return next(new Error(`Environment variable ${envVarSox} empty`))
+  const envVarCmd = `BOTIUM_SPEECH_CONVERT_PROFILE_${req.params.profile.toUpperCase()}_CMD`
+  if (!process.env[envVarCmd]) {
+    return next(new Error(`Environment variable ${envVarCmd} empty`))
   }
   const envVarOutput = `BOTIUM_SPEECH_CONVERT_PROFILE_${req.params.profile.toUpperCase()}_OUTPUT`
   if (!process.env[envVarOutput]) {
     return next(new Error(`Environment variable ${envVarOutput} empty`))
   }
 
   try {
-    const outputBuffer = await runsox(process.env[envVarSox], { inputBuffer: req.body, start: req.query.start, end: req.query.end })
+    const outputBuffer = await runconvert(process.env[envVarCmd], process.env[envVarOutput], { inputBuffer: req.body, start: req.query.start, end: req.query.end })
     res.writeHead(200, {
       'Content-disposition': `attachment; filename="${process.env[envVarOutput]}"`,
       'Content-Length': outputBuffer.length
diff --git a/frontend/src/swagger.json b/frontend/src/swagger.json
@@ -229,6 +229,30 @@
         }
       }
     },
+    "/api/convertprofiles": {
+      "get": {
+        "description": "Get list of audio conversion profile",
+        "security": [
+          {
+            "ApiKeyAuth": []
+          }
+        ],
+        "produces": [
+          "application/json"
+        ],
+        "responses": {
+          "200": {
+            "description": "List of supported audio conversion profiles",
+            "schema": {
+              "type": "array",
+              "items": {
+                "type": "string"
+              }
+            }
+          }
+        }
+      }
+    },
     "/api/convert/{profile}": {
       "post": {
         "description": "Convert audio file",
@@ -272,7 +296,7 @@
         "requestBody": {
           "description": "Audio file",
           "content": {
-            "audio/*": {
+            "audio/wav": {
               "schema": {
                 "type": "string",
                 "format": "binary"