Merge pull request #17 from codeforequity-at/develop

Botium · web-flow · commit 0853cbf8cb4c · 2021-01-08T16:45:16.000+01:00
1.0.2
diff --git a/frontend/.gitignore b/frontend/.gitignore
@@ -2,4 +2,5 @@ node_modules
 package-lock.json
 *.local
 resources/.cache
+resources/.tmp
 resources/google.json
diff --git a/frontend/resources/.env b/frontend/resources/.env
@@ -40,8 +40,41 @@ BOTIUM_SPEECH_KALDI_URL_DE=http://stt_de:80/client/dynamic/recognize
 
 # WAV Conversation Command Line
 BOTIUM_SPEECH_CONVERT_PROFILE_WAVTOMONOWAV_CMD=sox -t wav - -r 16k -t wav -c 1 -b 16 -e signed {{{output}}}
+BOTIUM_SPEECH_CONVERT_PROFILE_WAVTOMONOWAV_DESC=Converts WAV file to a Mono Wav, 16khz, 16bit
 BOTIUM_SPEECH_CONVERT_PROFILE_WAVTOMONOWAV_OUTPUT=output.wav
 BOTIUM_SPEECH_CONVERT_PROFILE_MP3TOMONOWAV_CMD=sox -t mp3 - -r 16k -t wav -c 1 -b 16 -e signed {{{output}}}
+BOTIUM_SPEECH_CONVERT_PROFILE_MP3TOMONOWAV_DESC=Converts MP3 file to a Mono Wav, 16khz, 16bit
 BOTIUM_SPEECH_CONVERT_PROFILE_MP3TOMONOWAV_OUTPUT=output.wav
 BOTIUM_SPEECH_CONVERT_PROFILE_WEBMTOMONOWAV_CMD=ffmpeg -i - -f wav - | sox -t wav - -r 16k -t wav -c 1 -b 16 -e signed {{{output}}}
+BOTIUM_SPEECH_CONVERT_PROFILE_WEBMTOMONOWAV_DESC=Converts WEBM file to a Mono Wav, 16khz, 16bit
 BOTIUM_SPEECH_CONVERT_PROFILE_WEBMTOMONOWAV_OUTPUT=output.wav
+
+# Effects Command Lines
+BOTIUM_SPEECH_CONVERT_PROFILE_GSM_CMD=sox -t {{{inputtype}}} - -t {{{inputtype}}} -r 8k -c 1 -b 8 -e signed {{{output}}} lowpass 2000 highpass 500
+BOTIUM_SPEECH_CONVERT_PROFILE_GSM_DESC=Makes audio sound like a GSM phone call
+BOTIUM_SPEECH_CONVERT_PROFILE_ADDLOWNOISE_CMD=sox -t {{{inputtype}}} {{{input}}} -p synth brownnoise vol 0.1 | sox -m {{{input}}} - {{{output}}}
+BOTIUM_SPEECH_CONVERT_PROFILE_ADDLOWNOISE_DESC=Adds low volume background noise
+BOTIUM_SPEECH_CONVERT_PROFILE_ADDHIGHNOISE_CMD=sox -t {{{inputtype}}} {{{input}}} -p synth brownnoise vol 0.3 | sox -m {{{input}}} - {{{output}}}
+BOTIUM_SPEECH_CONVERT_PROFILE_ADDHIGHNOISE_DESC=Adds high volume background noise
+BOTIUM_SPEECH_CONVERT_PROFILE_ADDHIGHESTNOISE_CMD=sox -t {{{inputtype}}} {{{input}}} -p synth brownnoise vol 0.5 | sox -m {{{input}}} - {{{output}}}
+BOTIUM_SPEECH_CONVERT_PROFILE_ADDHIGHESTNOISE_DESC=Adds killer background noise
+BOTIUM_SPEECH_CONVERT_PROFILE_LOWESTVOL_CMD=sox -t {{{inputtype}}} - -t {{{inputtype}}} {{{output}}} vol 0.1
+BOTIUM_SPEECH_CONVERT_PROFILE_LOWESTVOL_DESC=Minimizes the volume
+BOTIUM_SPEECH_CONVERT_PROFILE_LOWVOL_CMD=sox -t {{{inputtype}}} - -t {{{inputtype}}} {{{output}}} vol 0.5
+BOTIUM_SPEECH_CONVERT_PROFILE_LOWVOL_DESC=Turns down the volume
+BOTIUM_SPEECH_CONVERT_PROFILE_HIGHVOL_CMD=sox -t {{{inputtype}}} - -t {{{inputtype}}} {{{output}}} vol 2
+BOTIUM_SPEECH_CONVERT_PROFILE_HIGHVOL_DESC=Turns up the volume
+BOTIUM_SPEECH_CONVERT_PROFILE_HIGHESTVOL_CMD=sox -t {{{inputtype}}} - -t {{{inputtype}}} {{{output}}} vol 4
+BOTIUM_SPEECH_CONVERT_PROFILE_HIGHESTVOL_DESC=Maximizes the volume
+BOTIUM_SPEECH_CONVERT_PROFILE_BREAKX1_CMD=sox -t {{{inputtype}}} - -t {{{inputtype}}} {{{output}}} pad 0.3@1.0
+BOTIUM_SPEECH_CONVERT_PROFILE_BREAKX1_DESC=Adds one artificial silence break
+BOTIUM_SPEECH_CONVERT_PROFILE_BREAKX2_CMD=sox -t {{{inputtype}}} - -t {{{inputtype}}} {{{output}}} pad 0.3@1.0 0.3@2.0
+BOTIUM_SPEECH_CONVERT_PROFILE_BREAKX2_DESC=Adds two artificial silence breaks
+BOTIUM_SPEECH_CONVERT_PROFILE_BREAKX3_CMD=sox -t {{{inputtype}}} - -t {{{inputtype}}} {{{output}}} pad 0.3@1.0 0.3@2.0 0.3@3.0
+BOTIUM_SPEECH_CONVERT_PROFILE_BREAKX3_DESC=Adds three artificial silence breaks
+BOTIUM_SPEECH_CONVERT_PROFILE_SILENCEX1_CMD=sox -t {{{inputtype}}} - -t {{{inputtype}}} {{{output}}} pad 0.3@1.0 trim 0 1.3 0.3
+BOTIUM_SPEECH_CONVERT_PROFILE_SILENCEX1_DESC=Replaces one short section with silence
+BOTIUM_SPEECH_CONVERT_PROFILE_SILENCEX2_CMD=sox -t {{{inputtype}}} - -t {{{inputtype}}} {{{output}}} pad 0.3@1.0 0.3@2.0 trim 0 1.3 0.3 trim 0 2.3 0.3
+BOTIUM_SPEECH_CONVERT_PROFILE_SILENCEX2_DESC=Replaces two short sections with silence
+BOTIUM_SPEECH_CONVERT_PROFILE_SILENCEX3_CMD=sox -t {{{inputtype}}} - -t {{{inputtype}}} {{{output}}} pad 0.3@1.0 0.3@2.0 0.3@3.0 trim 0 1.3 0.3 trim 0 2.3 0.3 trim 0 3.3 0.3
+BOTIUM_SPEECH_CONVERT_PROFILE_SILENCEX3_DESC=Replaces three short sections with silence
diff --git a/frontend/src/convert/convert.js b/frontend/src/convert/convert.js
@@ -1,22 +1,75 @@
 const fs = require('fs')
 const Mustache = require('mustache')
-const { spawn } = require('child_process')
+const { spawn, exec } = require('child_process')
 const { v1: uuidv1 } = require('uuid')
 const debug = require('debug')('botium-speech-processing-convert')
 
-const runconvert = (cmdLine, outputFile, { inputBuffer, start, end }) => {
+const _getSoxFileType = (filename) => {
   return new Promise((resolve, reject) => {
-    const output = `${process.env.BOTIUM_SPEECH_TMP_DIR || '/tmp'}/${uuidv1()}_${outputFile}`
-
-    let cmdLineFull = Mustache.render(cmdLine, { output })
-    if (start && end) {
-      cmdLineFull = `${cmdLineFull} trim ${start} ${end}`
-    } else if (start && !end) {
-      cmdLineFull = `${cmdLineFull} trim ${start}`
-    } else if (!start && end) {
-      cmdLineFull = `${cmdLineFull} trim 0 ${end}`
+    exec(`soxi -t ${filename}`, (err, stdout, stderr) => {
+      if (err) return reject(err)
+      if (stderr) return reject(stderr.trim())
+      resolve(stdout.trim())
+    })
+  })
+}
+
+const _isMP3 = (buf) => {
+  if (!buf || buf.length < 3) {
+    return false
+  }
+  return (buf[0] === 73 &&
+    buf[1] === 68 &&
+    buf[2] === 51) || (
+    buf[0] === 255 &&
+      (buf[1] === 251 || buf[1] === 250)
+  )
+}
+
+const runconvert = async (cmdLine, outputName, { inputBuffer, start, end }) => {
+  const jobId = uuidv1()
+
+  const writeInput = !outputName || cmdLine.indexOf('{{{input}}}') >= 0 || cmdLine.indexOf('{{{inputtype}}}') >= 0
+
+  let input = null
+  let inputtype = null
+
+  if (writeInput) {
+    input = `${process.env.BOTIUM_SPEECH_TMP_DIR || '/tmp'}/${jobId}_input`
+    try {
+      fs.writeFileSync(input, inputBuffer)
+    } catch (err) {
+      debug(`conversion process input file ${input} not writable: ${err.message}`)
+      throw new Error('conversion process input file not writable')
+    }
+    if (_isMP3(inputBuffer)) {
+      inputtype = 'mp3'
+    } else {
+      try {
+        inputtype = await _getSoxFileType(input)
+        debug(`Identified input type: ${inputtype}`)
+      } catch (err) {
+        debug(`identification of input file type ${input} failed: ${err.message}`)
+        throw new Error('identification of input file type failed')
+      }
+    }
+    if (!outputName) {
+      outputName = `output.${inputtype}`
     }
-    debug(`cmdLineFull: ${cmdLineFull}`)
+  }
+  const output = `${process.env.BOTIUM_SPEECH_TMP_DIR || '/tmp'}/${jobId}_${outputName}`
+
+  let cmdLineFull = Mustache.render(cmdLine, { output, input, inputtype })
+  if (start && end) {
+    cmdLineFull = `${cmdLineFull} trim ${start} ${end}`
+  } else if (start && !end) {
+    cmdLineFull = `${cmdLineFull} trim ${start}`
+  } else if (!start && end) {
+    cmdLineFull = `${cmdLineFull} trim 0 ${end}`
+  }
+  debug(`cmdLineFull: ${cmdLineFull}`)
+
+  return new Promise((resolve, reject) => {
     const childProcess = spawn('/bin/sh', ['-c', cmdLineFull])
 
     childProcess.once('exit', (code, signal) => {
@@ -25,13 +78,23 @@ const runconvert = (cmdLine, outputFile, { inputBuffer, start, end }) => {
         try {
           const outputBuffer = fs.readFileSync(output)
           fs.unlinkSync(output)
-          resolve(outputBuffer)
+          resolve({
+            outputName,
+            outputBuffer
+          })
         } catch (err) {
           reject(new Error(`conversion process output file ${output} not readable: ${err.message}`))
         }
       } else {
         reject(new Error(`conversion process exited with code ${code}, signal ${signal}`))
       }
+      if (input) {
+        try {
+          fs.unlinkSync(input)
+        } catch (err) {
+          debug(`conversion process input file ${input} not deleted: ${err.message}`)
+        }
+      }
     })
     childProcess.once('error', (err) => {
       debug(`conversion process failed: ${err.message}`)
@@ -50,7 +113,9 @@ const runconvert = (cmdLine, outputFile, { inputBuffer, start, end }) => {
       debug('stderr ' + data)
     })
 
-    childProcess.stdin.write(inputBuffer)
+    if (cmdLine.indexOf('{{{input}}}') < 0) {
+      childProcess.stdin.write(inputBuffer)
+    }
     childProcess.stdin.end()
   })
 }
diff --git a/frontend/src/routes.js b/frontend/src/routes.js
@@ -372,10 +372,19 @@ router.get('/api/tts/:language', async (req, res, next) => {
  *         schema:
  *           type: array
  *           items:
- *             type: string
+ *             type: object
+ *             properties:
+ *               name:
+ *                 type: string
+ *               description:
+ *                 type: string
  */
 router.get('/api/convertprofiles', async (req, res, next) => {
-  res.json(Object.keys(process.env).filter(e => e.startsWith('BOTIUM_SPEECH_CONVERT_PROFILE_') && e.endsWith('_CMD')).map(e => e.split('_')[4]))
+  const keys = Object.keys(process.env).filter(e => e.startsWith('BOTIUM_SPEECH_CONVERT_PROFILE_') && e.endsWith('_CMD')).map(e => e.split('_')[4])
+  return res.json(keys.map(key => ({
+    name: key,
+    description: process.env[`BOTIUM_SPEECH_CONVERT_PROFILE_${key}_DESC`] || ''
+  })))
 })
 
 /**
@@ -423,7 +432,6 @@ router.get('/api/convertprofiles', async (req, res, next) => {
  *               format: binary
  */
 router.post('/api/convert/:profile', async (req, res, next) => {
-  console.log(req.body)
   if (!Buffer.isBuffer(req.body)) {
     return next(new Error('req.body is not a buffer'))
   }
@@ -432,14 +440,11 @@ router.post('/api/convert/:profile', async (req, res, next) => {
     return next(new Error(`Environment variable ${envVarCmd} empty`))
   }
   const envVarOutput = `BOTIUM_SPEECH_CONVERT_PROFILE_${req.params.profile.toUpperCase()}_OUTPUT`
-  if (!process.env[envVarOutput]) {
-    return next(new Error(`Environment variable ${envVarOutput} empty`))
-  }
 
   try {
-    const outputBuffer = await runconvert(process.env[envVarCmd], process.env[envVarOutput], { inputBuffer: req.body, start: req.query.start, end: req.query.end })
+    const { outputName, outputBuffer } = await runconvert(process.env[envVarCmd], process.env[envVarOutput], { inputBuffer: req.body, start: req.query.start, end: req.query.end })
     res.writeHead(200, {
-      'Content-disposition': `attachment; filename="${process.env[envVarOutput]}"`,
+      'Content-disposition': `attachment; filename="${outputName}"`,
       'Content-Length': outputBuffer.length
     })
     res.end(outputBuffer)
diff --git a/frontend/src/swagger.json b/frontend/src/swagger.json
@@ -2,7 +2,7 @@
   "openapi": "3.0.0",
   "info": {
     "title": "Botium Speech Processing API",
-    "version": "1.0.1",
+    "version": "1.0.2",
     "description": "Botium Speech Processing API"
   },
   "basePath": "/",
@@ -325,7 +325,15 @@
             "schema": {
               "type": "array",
               "items": {
-                "type": "string"
+                "type": "object",
+                "properties": {
+                  "name": {
+                    "type": "string"
+                  },
+                  "description": {
+                    "type": "string"
+                  }
+                }
               }
             }
           }
diff --git a/frontend/src/swaggerDef.json b/frontend/src/swaggerDef.json
@@ -2,7 +2,7 @@
     "openapi": "3.0.0",
     "info": {
       "title": "Botium Speech Processing API",
-      "version": "1.0.1",
+      "version": "1.0.2",
       "description": "Botium Speech Processing API"
     },
     "basePath": "/"