BOT-2014 additional conversion profiles for audio perturbation

Florian Treml · Florian Treml · commit c3526f34196c · 2021-01-04T19:15:28.000+01:00
diff --git a/frontend/.gitignore b/frontend/.gitignore
@@ -2,4 +2,5 @@ node_modules
 package-lock.json
 *.local
 resources/.cache
+resources/.tmp
 resources/google.json
diff --git a/frontend/resources/.env b/frontend/resources/.env
@@ -40,8 +40,32 @@ BOTIUM_SPEECH_KALDI_URL_DE=http://stt_de:80/client/dynamic/recognize
 
 # WAV Conversation Command Line
 BOTIUM_SPEECH_CONVERT_PROFILE_WAVTOMONOWAV_CMD=sox -t wav - -r 16k -t wav -c 1 -b 16 -e signed {{{output}}}
-BOTIUM_SPEECH_CONVERT_PROFILE_WAVTOMONOWAV_OUTPUT=output.wav
+BOTIUM_SPEECH_CONVERT_PROFILE_WAVTOMONOWAV_DESC=Converts WAV file to a Mono Wav, 16khz, 16bit
 BOTIUM_SPEECH_CONVERT_PROFILE_MP3TOMONOWAV_CMD=sox -t mp3 - -r 16k -t wav -c 1 -b 16 -e signed {{{output}}}
-BOTIUM_SPEECH_CONVERT_PROFILE_MP3TOMONOWAV_OUTPUT=output.wav
+BOTIUM_SPEECH_CONVERT_PROFILE_MP3TOMONOWAV_DESC=Converts MP3 file to a Mono Wav, 16khz, 16bit
 BOTIUM_SPEECH_CONVERT_PROFILE_WEBMTOMONOWAV_CMD=ffmpeg -i - -f wav - | sox -t wav - -r 16k -t wav -c 1 -b 16 -e signed {{{output}}}
-BOTIUM_SPEECH_CONVERT_PROFILE_WEBMTOMONOWAV_OUTPUT=output.wav
+BOTIUM_SPEECH_CONVERT_PROFILE_WEBMTOMONOWAV_DESC=Converts WEBM file to a Mono Wav, 16khz, 16bit
+
+# Effects Command Lines
+BOTIUM_SPEECH_CONVERT_PROFILE_WAVTOGSM_CMD=sox -t wav - -t wav -r 8k -c 1 -b 8 -e signed {{{output}}} lowpass 2000 highpass 500
+BOTIUM_SPEECH_CONVERT_PROFILE_WAVTOGSM_DESC=Makes WAV file sound like a GSM phone call
+BOTIUM_SPEECH_CONVERT_PROFILE_WAVADDLOWNOISE_CMD=sox -t wav {{{input}}} -p synth brownnoise vol 0.1 | sox -m {{{input}}} - {{{output}}}
+BOTIUM_SPEECH_CONVERT_PROFILE_WAVADDLOWNOISE_DESC=Adds low volume background noise to a WAV file
+BOTIUM_SPEECH_CONVERT_PROFILE_WAVADDHIGHNOISE_CMD=sox -t wav {{{input}}} -p synth brownnoise vol 0.3 | sox -m {{{input}}} - {{{output}}}
+BOTIUM_SPEECH_CONVERT_PROFILE_WAVADDHIGHNOISE_DESC=Adds high volume background noise to a WAV file
+BOTIUM_SPEECH_CONVERT_PROFILE_WAVADDHIGHESTNOISE_CMD=sox -t wav {{{input}}} -p synth brownnoise vol 0.5 | sox -m {{{input}}} - {{{output}}}
+BOTIUM_SPEECH_CONVERT_PROFILE_WAVADDHIGHESTNOISE_DESC=Adds killer background noise to a WAV file
+BOTIUM_SPEECH_CONVERT_PROFILE_WAVLOWESTVOL_CMD=sox -t wav - -t wav {{{output}}} vol 0.1
+BOTIUM_SPEECH_CONVERT_PROFILE_WAVLOWESTVOL_DESC=Minimizes the volume of a WAV file
+BOTIUM_SPEECH_CONVERT_PROFILE_WAVLOWVOL_CMD=sox -t wav - -t wav {{{output}}} vol 0.5
+BOTIUM_SPEECH_CONVERT_PROFILE_WAVLOWVOL_DESC=Turns down the volume of a WAV file
+BOTIUM_SPEECH_CONVERT_PROFILE_WAVHIGHVOL_CMD=sox -t wav - -t wav {{{output}}} vol 2
+BOTIUM_SPEECH_CONVERT_PROFILE_WAVHIGHVOL_DESC=Turns up the volume of a WAV file
+BOTIUM_SPEECH_CONVERT_PROFILE_WAVHIGHESTVOL_CMD=sox -t wav - -t wav {{{output}}} vol 4
+BOTIUM_SPEECH_CONVERT_PROFILE_WAVHIGHESTVOL_DESC=Maximizes the volume of a WAV file
+BOTIUM_SPEECH_CONVERT_PROFILE_WAVBREAKX1_CMD=sox -t wav - -t wav {{{output}}} pad 0.5@1.0
+BOTIUM_SPEECH_CONVERT_PROFILE_WAVBREAKX1_DESC=Adds one artificial silence break into WAV file
+BOTIUM_SPEECH_CONVERT_PROFILE_WAVBREAKX2_CMD=sox -t wav - -t wav {{{output}}} pad 0.3@1.0 0.3@2.0
+BOTIUM_SPEECH_CONVERT_PROFILE_WAVBREAKX2_DESC=Adds two artificial silence breaks into WAV file
+BOTIUM_SPEECH_CONVERT_PROFILE_WAVBREAKX3_CMD=sox -t wav - -t wav {{{output}}} pad 0.3@1.0 0.3@2.0 0.3@3.0
+BOTIUM_SPEECH_CONVERT_PROFILE_WAVBREAKX3_DESC=Adds three artificial silence breaks into WAV file
diff --git a/frontend/src/convert/convert.js b/frontend/src/convert/convert.js
@@ -6,9 +6,20 @@ const debug = require('debug')('botium-speech-processing-convert')
 
 const runconvert = (cmdLine, outputFile, { inputBuffer, start, end }) => {
   return new Promise((resolve, reject) => {
-    const output = `${process.env.BOTIUM_SPEECH_TMP_DIR || '/tmp'}/${uuidv1()}_${outputFile}`
+    const jobId = uuidv1()
 
-    let cmdLineFull = Mustache.render(cmdLine, { output })
+    const output = `${process.env.BOTIUM_SPEECH_TMP_DIR || '/tmp'}/${jobId}_${outputFile}`
+    const input = cmdLine.indexOf('{{{input}}}') >= 0 ? `${process.env.BOTIUM_SPEECH_TMP_DIR || '/tmp'}/${jobId}_input` : null
+
+    if (input) {
+      try {
+        fs.writeFileSync(input, inputBuffer)
+      } catch (err) {
+        reject(new Error(`conversion process input file ${input} not writable: ${err.message}`))
+      }
+    }
+
+    let cmdLineFull = Mustache.render(cmdLine, { output, input })
     if (start && end) {
       cmdLineFull = `${cmdLineFull} trim ${start} ${end}`
     } else if (start && !end) {
@@ -32,6 +43,13 @@ const runconvert = (cmdLine, outputFile, { inputBuffer, start, end }) => {
       } else {
         reject(new Error(`conversion process exited with code ${code}, signal ${signal}`))
       }
+      if (input) {
+        try {
+          fs.unlinkSync(input)
+        } catch (err) {
+          debug(`conversion process input file ${input} not deleted: ${err.message}`)
+        }
+      }
     })
     childProcess.once('error', (err) => {
       debug(`conversion process failed: ${err.message}`)
@@ -50,7 +68,9 @@ const runconvert = (cmdLine, outputFile, { inputBuffer, start, end }) => {
       debug('stderr ' + data)
     })
 
-    childProcess.stdin.write(inputBuffer)
+    if (!input) {
+      childProcess.stdin.write(inputBuffer)
+    }
     childProcess.stdin.end()
   })
 }
diff --git a/frontend/src/routes.js b/frontend/src/routes.js
@@ -372,10 +372,19 @@ router.get('/api/tts/:language', async (req, res, next) => {
  *         schema:
  *           type: array
  *           items:
- *             type: string
+ *             type: object
+ *             properties:
+ *               name:
+ *                 type: string
+ *               description:
+ *                 type: string
  */
 router.get('/api/convertprofiles', async (req, res, next) => {
-  res.json(Object.keys(process.env).filter(e => e.startsWith('BOTIUM_SPEECH_CONVERT_PROFILE_') && e.endsWith('_CMD')).map(e => e.split('_')[4]))
+  const keys = Object.keys(process.env).filter(e => e.startsWith('BOTIUM_SPEECH_CONVERT_PROFILE_') && e.endsWith('_CMD')).map(e => e.split('_')[4])
+  return res.json(keys.map(key => ({
+    name: key,
+    description: process.env[`BOTIUM_SPEECH_CONVERT_PROFILE_${key}_DESC`] || ''
+  })))
 })
 
 /**
@@ -423,7 +432,6 @@ router.get('/api/convertprofiles', async (req, res, next) => {
  *               format: binary
  */
 router.post('/api/convert/:profile', async (req, res, next) => {
-  console.log(req.body)
   if (!Buffer.isBuffer(req.body)) {
     return next(new Error('req.body is not a buffer'))
   }
@@ -432,14 +440,11 @@ router.post('/api/convert/:profile', async (req, res, next) => {
     return next(new Error(`Environment variable ${envVarCmd} empty`))
   }
   const envVarOutput = `BOTIUM_SPEECH_CONVERT_PROFILE_${req.params.profile.toUpperCase()}_OUTPUT`
-  if (!process.env[envVarOutput]) {
-    return next(new Error(`Environment variable ${envVarOutput} empty`))
-  }
 
   try {
-    const outputBuffer = await runconvert(process.env[envVarCmd], process.env[envVarOutput], { inputBuffer: req.body, start: req.query.start, end: req.query.end })
+    const outputBuffer = await runconvert(process.env[envVarCmd], process.env[envVarOutput] || 'output.wav', { inputBuffer: req.body, start: req.query.start, end: req.query.end })
     res.writeHead(200, {
-      'Content-disposition': `attachment; filename="${process.env[envVarOutput]}"`,
+      'Content-disposition': `attachment; filename="${process.env[envVarOutput] || 'output.wav'}"`,
       'Content-Length': outputBuffer.length
     })
     res.end(outputBuffer)
diff --git a/frontend/src/swagger.json b/frontend/src/swagger.json
@@ -2,7 +2,7 @@
   "openapi": "3.0.0",
   "info": {
     "title": "Botium Speech Processing API",
-    "version": "1.0.1",
+    "version": "1.0.2",
     "description": "Botium Speech Processing API"
   },
   "basePath": "/",
@@ -325,7 +325,15 @@
             "schema": {
               "type": "array",
               "items": {
-                "type": "string"
+                "type": "object",
+                "properties": {
+                  "name": {
+                    "type": "string"
+                  },
+                  "description": {
+                    "type": "string"
+                  }
+                }
               }
             }
           }
diff --git a/frontend/src/swaggerDef.json b/frontend/src/swaggerDef.json
@@ -2,7 +2,7 @@
     "openapi": "3.0.0",
     "info": {
       "title": "Botium Speech Processing API",
-      "version": "1.0.1",
+      "version": "1.0.2",
       "description": "Botium Speech Processing API"
     },
     "basePath": "/"