Skip to content

Commit c3526f3

Browse files
author
Florian Treml
committed
BOT-2014 additional conversion profiles for audio perturbation
1 parent 33f75e1 commit c3526f3

File tree

6 files changed

+75
-17
lines changed

6 files changed

+75
-17
lines changed

frontend/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,5 @@ node_modules
22
package-lock.json
33
*.local
44
resources/.cache
5+
resources/.tmp
56
resources/google.json

frontend/resources/.env

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,32 @@ BOTIUM_SPEECH_KALDI_URL_DE=http://stt_de:80/client/dynamic/recognize
4040

4141
# WAV Conversation Command Line
4242
BOTIUM_SPEECH_CONVERT_PROFILE_WAVTOMONOWAV_CMD=sox -t wav - -r 16k -t wav -c 1 -b 16 -e signed {{{output}}}
43-
BOTIUM_SPEECH_CONVERT_PROFILE_WAVTOMONOWAV_OUTPUT=output.wav
43+
BOTIUM_SPEECH_CONVERT_PROFILE_WAVTOMONOWAV_DESC=Converts WAV file to a Mono Wav, 16khz, 16bit
4444
BOTIUM_SPEECH_CONVERT_PROFILE_MP3TOMONOWAV_CMD=sox -t mp3 - -r 16k -t wav -c 1 -b 16 -e signed {{{output}}}
45-
BOTIUM_SPEECH_CONVERT_PROFILE_MP3TOMONOWAV_OUTPUT=output.wav
45+
BOTIUM_SPEECH_CONVERT_PROFILE_MP3TOMONOWAV_DESC=Converts MP3 file to a Mono Wav, 16khz, 16bit
4646
BOTIUM_SPEECH_CONVERT_PROFILE_WEBMTOMONOWAV_CMD=ffmpeg -i - -f wav - | sox -t wav - -r 16k -t wav -c 1 -b 16 -e signed {{{output}}}
47-
BOTIUM_SPEECH_CONVERT_PROFILE_WEBMTOMONOWAV_OUTPUT=output.wav
47+
BOTIUM_SPEECH_CONVERT_PROFILE_WEBMTOMONOWAV_DESC=Converts WEBM file to a Mono Wav, 16khz, 16bit
48+
49+
# Effects Command Lines
50+
BOTIUM_SPEECH_CONVERT_PROFILE_WAVTOGSM_CMD=sox -t wav - -t wav -r 8k -c 1 -b 8 -e signed {{{output}}} lowpass 2000 highpass 500
51+
BOTIUM_SPEECH_CONVERT_PROFILE_WAVTOGSM_DESC=Makes WAV file sound like a GSM phone call
52+
BOTIUM_SPEECH_CONVERT_PROFILE_WAVADDLOWNOISE_CMD=sox -t wav {{{input}}} -p synth brownnoise vol 0.1 | sox -m {{{input}}} - {{{output}}}
53+
BOTIUM_SPEECH_CONVERT_PROFILE_WAVADDLOWNOISE_DESC=Adds low volume background noise to a WAV file
54+
BOTIUM_SPEECH_CONVERT_PROFILE_WAVADDHIGHNOISE_CMD=sox -t wav {{{input}}} -p synth brownnoise vol 0.3 | sox -m {{{input}}} - {{{output}}}
55+
BOTIUM_SPEECH_CONVERT_PROFILE_WAVADDHIGHNOISE_DESC=Adds high volume background noise to a WAV file
56+
BOTIUM_SPEECH_CONVERT_PROFILE_WAVADDHIGHESTNOISE_CMD=sox -t wav {{{input}}} -p synth brownnoise vol 0.5 | sox -m {{{input}}} - {{{output}}}
57+
BOTIUM_SPEECH_CONVERT_PROFILE_WAVADDHIGHESTNOISE_DESC=Adds killer background noise to a WAV file
58+
BOTIUM_SPEECH_CONVERT_PROFILE_WAVLOWESTVOL_CMD=sox -t wav - -t wav {{{output}}} vol 0.1
59+
BOTIUM_SPEECH_CONVERT_PROFILE_WAVLOWESTVOL_DESC=Minimizes the volume of a WAV file
60+
BOTIUM_SPEECH_CONVERT_PROFILE_WAVLOWVOL_CMD=sox -t wav - -t wav {{{output}}} vol 0.5
61+
BOTIUM_SPEECH_CONVERT_PROFILE_WAVLOWVOL_DESC=Turns down the volume of a WAV file
62+
BOTIUM_SPEECH_CONVERT_PROFILE_WAVHIGHVOL_CMD=sox -t wav - -t wav {{{output}}} vol 2
63+
BOTIUM_SPEECH_CONVERT_PROFILE_WAVHIGHVOL_DESC=Turns up the volume of a WAV file
64+
BOTIUM_SPEECH_CONVERT_PROFILE_WAVHIGHESTVOL_CMD=sox -t wav - -t wav {{{output}}} vol 4
65+
BOTIUM_SPEECH_CONVERT_PROFILE_WAVHIGHESTVOL_DESC=Maximizes the volume of a WAV file
66+
BOTIUM_SPEECH_CONVERT_PROFILE_WAVBREAKX1_CMD=sox -t wav - -t wav {{{output}}} pad [email protected]
67+
BOTIUM_SPEECH_CONVERT_PROFILE_WAVBREAKX1_DESC=Adds one artificial silence break into WAV file
68+
BOTIUM_SPEECH_CONVERT_PROFILE_WAVBREAKX2_CMD=sox -t wav - -t wav {{{output}}} pad [email protected] [email protected]
69+
BOTIUM_SPEECH_CONVERT_PROFILE_WAVBREAKX2_DESC=Adds two artificial silence breaks into WAV file
70+
BOTIUM_SPEECH_CONVERT_PROFILE_WAVBREAKX3_CMD=sox -t wav - -t wav {{{output}}} pad [email protected] [email protected] [email protected]
71+
BOTIUM_SPEECH_CONVERT_PROFILE_WAVBREAKX3_DESC=Adds three artificial silence breaks into WAV file

frontend/src/convert/convert.js

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,20 @@ const debug = require('debug')('botium-speech-processing-convert')
66

77
const runconvert = (cmdLine, outputFile, { inputBuffer, start, end }) => {
88
return new Promise((resolve, reject) => {
9-
const output = `${process.env.BOTIUM_SPEECH_TMP_DIR || '/tmp'}/${uuidv1()}_${outputFile}`
9+
const jobId = uuidv1()
1010

11-
let cmdLineFull = Mustache.render(cmdLine, { output })
11+
const output = `${process.env.BOTIUM_SPEECH_TMP_DIR || '/tmp'}/${jobId}_${outputFile}`
12+
const input = cmdLine.indexOf('{{{input}}}') >= 0 ? `${process.env.BOTIUM_SPEECH_TMP_DIR || '/tmp'}/${jobId}_input` : null
13+
14+
if (input) {
15+
try {
16+
fs.writeFileSync(input, inputBuffer)
17+
} catch (err) {
18+
reject(new Error(`conversion process input file ${input} not writable: ${err.message}`))
19+
}
20+
}
21+
22+
let cmdLineFull = Mustache.render(cmdLine, { output, input })
1223
if (start && end) {
1324
cmdLineFull = `${cmdLineFull} trim ${start} ${end}`
1425
} else if (start && !end) {
@@ -32,6 +43,13 @@ const runconvert = (cmdLine, outputFile, { inputBuffer, start, end }) => {
3243
} else {
3344
reject(new Error(`conversion process exited with code ${code}, signal ${signal}`))
3445
}
46+
if (input) {
47+
try {
48+
fs.unlinkSync(input)
49+
} catch (err) {
50+
debug(`conversion process input file ${input} not deleted: ${err.message}`)
51+
}
52+
}
3553
})
3654
childProcess.once('error', (err) => {
3755
debug(`conversion process failed: ${err.message}`)
@@ -50,7 +68,9 @@ const runconvert = (cmdLine, outputFile, { inputBuffer, start, end }) => {
5068
debug('stderr ' + data)
5169
})
5270

53-
childProcess.stdin.write(inputBuffer)
71+
if (!input) {
72+
childProcess.stdin.write(inputBuffer)
73+
}
5474
childProcess.stdin.end()
5575
})
5676
}

frontend/src/routes.js

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -372,10 +372,19 @@ router.get('/api/tts/:language', async (req, res, next) => {
372372
* schema:
373373
* type: array
374374
* items:
375-
* type: string
375+
* type: object
376+
* properties:
377+
* name:
378+
* type: string
379+
* description:
380+
* type: string
376381
*/
377382
router.get('/api/convertprofiles', async (req, res, next) => {
378-
res.json(Object.keys(process.env).filter(e => e.startsWith('BOTIUM_SPEECH_CONVERT_PROFILE_') && e.endsWith('_CMD')).map(e => e.split('_')[4]))
383+
const keys = Object.keys(process.env).filter(e => e.startsWith('BOTIUM_SPEECH_CONVERT_PROFILE_') && e.endsWith('_CMD')).map(e => e.split('_')[4])
384+
return res.json(keys.map(key => ({
385+
name: key,
386+
description: process.env[`BOTIUM_SPEECH_CONVERT_PROFILE_${key}_DESC`] || ''
387+
})))
379388
})
380389

381390
/**
@@ -423,7 +432,6 @@ router.get('/api/convertprofiles', async (req, res, next) => {
423432
* format: binary
424433
*/
425434
router.post('/api/convert/:profile', async (req, res, next) => {
426-
console.log(req.body)
427435
if (!Buffer.isBuffer(req.body)) {
428436
return next(new Error('req.body is not a buffer'))
429437
}
@@ -432,14 +440,11 @@ router.post('/api/convert/:profile', async (req, res, next) => {
432440
return next(new Error(`Environment variable ${envVarCmd} empty`))
433441
}
434442
const envVarOutput = `BOTIUM_SPEECH_CONVERT_PROFILE_${req.params.profile.toUpperCase()}_OUTPUT`
435-
if (!process.env[envVarOutput]) {
436-
return next(new Error(`Environment variable ${envVarOutput} empty`))
437-
}
438443

439444
try {
440-
const outputBuffer = await runconvert(process.env[envVarCmd], process.env[envVarOutput], { inputBuffer: req.body, start: req.query.start, end: req.query.end })
445+
const outputBuffer = await runconvert(process.env[envVarCmd], process.env[envVarOutput] || 'output.wav', { inputBuffer: req.body, start: req.query.start, end: req.query.end })
441446
res.writeHead(200, {
442-
'Content-disposition': `attachment; filename="${process.env[envVarOutput]}"`,
447+
'Content-disposition': `attachment; filename="${process.env[envVarOutput] || 'output.wav'}"`,
443448
'Content-Length': outputBuffer.length
444449
})
445450
res.end(outputBuffer)

frontend/src/swagger.json

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"openapi": "3.0.0",
33
"info": {
44
"title": "Botium Speech Processing API",
5-
"version": "1.0.1",
5+
"version": "1.0.2",
66
"description": "Botium Speech Processing API"
77
},
88
"basePath": "/",
@@ -325,7 +325,15 @@
325325
"schema": {
326326
"type": "array",
327327
"items": {
328-
"type": "string"
328+
"type": "object",
329+
"properties": {
330+
"name": {
331+
"type": "string"
332+
},
333+
"description": {
334+
"type": "string"
335+
}
336+
}
329337
}
330338
}
331339
}

frontend/src/swaggerDef.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"openapi": "3.0.0",
33
"info": {
44
"title": "Botium Speech Processing API",
5-
"version": "1.0.1",
5+
"version": "1.0.2",
66
"description": "Botium Speech Processing API"
77
},
88
"basePath": "/"

0 commit comments

Comments
 (0)