Skip to content

Commit c7b5c96

Browse files
author
Florian Treml
committed
BOT-2014 effects for other file types than wav
1 parent c3526f3 commit c7b5c96

File tree

3 files changed

+97
-43
lines changed

3 files changed

+97
-43
lines changed

frontend/resources/.env

Lines changed: 31 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -41,31 +41,40 @@ BOTIUM_SPEECH_KALDI_URL_DE=http://stt_de:80/client/dynamic/recognize
4141
# WAV Conversation Command Line
4242
BOTIUM_SPEECH_CONVERT_PROFILE_WAVTOMONOWAV_CMD=sox -t wav - -r 16k -t wav -c 1 -b 16 -e signed {{{output}}}
4343
BOTIUM_SPEECH_CONVERT_PROFILE_WAVTOMONOWAV_DESC=Converts WAV file to a Mono Wav, 16khz, 16bit
44+
BOTIUM_SPEECH_CONVERT_PROFILE_WAVTOMONOWAV_OUTPUT=output.wav
4445
BOTIUM_SPEECH_CONVERT_PROFILE_MP3TOMONOWAV_CMD=sox -t mp3 - -r 16k -t wav -c 1 -b 16 -e signed {{{output}}}
4546
BOTIUM_SPEECH_CONVERT_PROFILE_MP3TOMONOWAV_DESC=Converts MP3 file to a Mono Wav, 16khz, 16bit
47+
BOTIUM_SPEECH_CONVERT_PROFILE_MP3TOMONOWAV_OUTPUT=output.wav
4648
BOTIUM_SPEECH_CONVERT_PROFILE_WEBMTOMONOWAV_CMD=ffmpeg -i - -f wav - | sox -t wav - -r 16k -t wav -c 1 -b 16 -e signed {{{output}}}
4749
BOTIUM_SPEECH_CONVERT_PROFILE_WEBMTOMONOWAV_DESC=Converts WEBM file to a Mono Wav, 16khz, 16bit
50+
BOTIUM_SPEECH_CONVERT_PROFILE_WEBMTOMONOWAV_OUTPUT=output.wav
4851

4952
# Effects Command Lines
50-
BOTIUM_SPEECH_CONVERT_PROFILE_WAVTOGSM_CMD=sox -t wav - -t wav -r 8k -c 1 -b 8 -e signed {{{output}}} lowpass 2000 highpass 500
51-
BOTIUM_SPEECH_CONVERT_PROFILE_WAVTOGSM_DESC=Makes WAV file sound like a GSM phone call
52-
BOTIUM_SPEECH_CONVERT_PROFILE_WAVADDLOWNOISE_CMD=sox -t wav {{{input}}} -p synth brownnoise vol 0.1 | sox -m {{{input}}} - {{{output}}}
53-
BOTIUM_SPEECH_CONVERT_PROFILE_WAVADDLOWNOISE_DESC=Adds low volume background noise to a WAV file
54-
BOTIUM_SPEECH_CONVERT_PROFILE_WAVADDHIGHNOISE_CMD=sox -t wav {{{input}}} -p synth brownnoise vol 0.3 | sox -m {{{input}}} - {{{output}}}
55-
BOTIUM_SPEECH_CONVERT_PROFILE_WAVADDHIGHNOISE_DESC=Adds high volume background noise to a WAV file
56-
BOTIUM_SPEECH_CONVERT_PROFILE_WAVADDHIGHESTNOISE_CMD=sox -t wav {{{input}}} -p synth brownnoise vol 0.5 | sox -m {{{input}}} - {{{output}}}
57-
BOTIUM_SPEECH_CONVERT_PROFILE_WAVADDHIGHESTNOISE_DESC=Adds killer background noise to a WAV file
58-
BOTIUM_SPEECH_CONVERT_PROFILE_WAVLOWESTVOL_CMD=sox -t wav - -t wav {{{output}}} vol 0.1
59-
BOTIUM_SPEECH_CONVERT_PROFILE_WAVLOWESTVOL_DESC=Minimizes the volume of a WAV file
60-
BOTIUM_SPEECH_CONVERT_PROFILE_WAVLOWVOL_CMD=sox -t wav - -t wav {{{output}}} vol 0.5
61-
BOTIUM_SPEECH_CONVERT_PROFILE_WAVLOWVOL_DESC=Turns down the volume of a WAV file
62-
BOTIUM_SPEECH_CONVERT_PROFILE_WAVHIGHVOL_CMD=sox -t wav - -t wav {{{output}}} vol 2
63-
BOTIUM_SPEECH_CONVERT_PROFILE_WAVHIGHVOL_DESC=Turns up the volume of a WAV file
64-
BOTIUM_SPEECH_CONVERT_PROFILE_WAVHIGHESTVOL_CMD=sox -t wav - -t wav {{{output}}} vol 4
65-
BOTIUM_SPEECH_CONVERT_PROFILE_WAVHIGHESTVOL_DESC=Maximizes the volume of a WAV file
66-
BOTIUM_SPEECH_CONVERT_PROFILE_WAVBREAKX1_CMD=sox -t wav - -t wav {{{output}}} pad [email protected]
67-
BOTIUM_SPEECH_CONVERT_PROFILE_WAVBREAKX1_DESC=Adds one artificial silence break into WAV file
68-
BOTIUM_SPEECH_CONVERT_PROFILE_WAVBREAKX2_CMD=sox -t wav - -t wav {{{output}}} pad [email protected] [email protected]
69-
BOTIUM_SPEECH_CONVERT_PROFILE_WAVBREAKX2_DESC=Adds two artificial silence breaks into WAV file
70-
BOTIUM_SPEECH_CONVERT_PROFILE_WAVBREAKX3_CMD=sox -t wav - -t wav {{{output}}} pad [email protected] [email protected] [email protected]
71-
BOTIUM_SPEECH_CONVERT_PROFILE_WAVBREAKX3_DESC=Adds three artificial silence breaks into WAV file
53+
BOTIUM_SPEECH_CONVERT_PROFILE_GSM_CMD=sox -t {{{inputtype}}} - -t {{{inputtype}}} -r 8k -c 1 -b 8 -e signed {{{output}}} lowpass 2000 highpass 500
54+
BOTIUM_SPEECH_CONVERT_PROFILE_GSM_DESC=Makes audio sound like a GSM phone call
55+
BOTIUM_SPEECH_CONVERT_PROFILE_ADDLOWNOISE_CMD=sox -t {{{inputtype}}} {{{input}}} -p synth brownnoise vol 0.1 | sox -m {{{input}}} - {{{output}}}
56+
BOTIUM_SPEECH_CONVERT_PROFILE_ADDLOWNOISE_DESC=Adds low volume background noise
57+
BOTIUM_SPEECH_CONVERT_PROFILE_ADDHIGHNOISE_CMD=sox -t {{{inputtype}}} {{{input}}} -p synth brownnoise vol 0.3 | sox -m {{{input}}} - {{{output}}}
58+
BOTIUM_SPEECH_CONVERT_PROFILE_ADDHIGHNOISE_DESC=Adds high volume background noise
59+
BOTIUM_SPEECH_CONVERT_PROFILE_ADDHIGHESTNOISE_CMD=sox -t {{{inputtype}}} {{{input}}} -p synth brownnoise vol 0.5 | sox -m {{{input}}} - {{{output}}}
60+
BOTIUM_SPEECH_CONVERT_PROFILE_ADDHIGHESTNOISE_DESC=Adds killer background noise
61+
BOTIUM_SPEECH_CONVERT_PROFILE_LOWESTVOL_CMD=sox -t {{{inputtype}}} - -t {{{inputtype}}} {{{output}}} vol 0.1
62+
BOTIUM_SPEECH_CONVERT_PROFILE_LOWESTVOL_DESC=Minimizes the volume
63+
BOTIUM_SPEECH_CONVERT_PROFILE_LOWVOL_CMD=sox -t {{{inputtype}}} - -t {{{inputtype}}} {{{output}}} vol 0.5
64+
BOTIUM_SPEECH_CONVERT_PROFILE_LOWVOL_DESC=Turns down the volume
65+
BOTIUM_SPEECH_CONVERT_PROFILE_HIGHVOL_CMD=sox -t {{{inputtype}}} - -t {{{inputtype}}} {{{output}}} vol 2
66+
BOTIUM_SPEECH_CONVERT_PROFILE_HIGHVOL_DESC=Turns up the volume
67+
BOTIUM_SPEECH_CONVERT_PROFILE_HIGHESTVOL_CMD=sox -t {{{inputtype}}} - -t {{{inputtype}}} {{{output}}} vol 4
68+
BOTIUM_SPEECH_CONVERT_PROFILE_HIGHESTVOL_DESC=Maximizes the volume
69+
BOTIUM_SPEECH_CONVERT_PROFILE_BREAKX1_CMD=sox -t {{{inputtype}}} - -t {{{inputtype}}} {{{output}}} pad [email protected]
70+
BOTIUM_SPEECH_CONVERT_PROFILE_BREAKX1_DESC=Adds one artificial silence break
71+
BOTIUM_SPEECH_CONVERT_PROFILE_BREAKX2_CMD=sox -t {{{inputtype}}} - -t {{{inputtype}}} {{{output}}} pad [email protected] [email protected]
72+
BOTIUM_SPEECH_CONVERT_PROFILE_BREAKX2_DESC=Adds two artificial silence breaks
73+
BOTIUM_SPEECH_CONVERT_PROFILE_BREAKX3_CMD=sox -t {{{inputtype}}} - -t {{{inputtype}}} {{{output}}} pad [email protected] [email protected] [email protected]
74+
BOTIUM_SPEECH_CONVERT_PROFILE_BREAKX3_DESC=Adds three artificial silence breaks
75+
BOTIUM_SPEECH_CONVERT_PROFILE_SILENCEX1_CMD=sox -t {{{inputtype}}} - -t {{{inputtype}}} {{{output}}} pad [email protected] trim 0 1.3 0.3
76+
BOTIUM_SPEECH_CONVERT_PROFILE_SILENCEX1_DESC=Replaces one short section with silence
77+
BOTIUM_SPEECH_CONVERT_PROFILE_SILENCEX2_CMD=sox -t {{{inputtype}}} - -t {{{inputtype}}} {{{output}}} pad [email protected] [email protected] trim 0 1.3 0.3 trim 0 2.3 0.3
78+
BOTIUM_SPEECH_CONVERT_PROFILE_SILENCEX2_DESC=Replaces two short sections with silence
79+
BOTIUM_SPEECH_CONVERT_PROFILE_SILENCEX3_CMD=sox -t {{{inputtype}}} - -t {{{inputtype}}} {{{output}}} pad [email protected] [email protected] [email protected] trim 0 1.3 0.3 trim 0 2.3 0.3 trim 0 3.3 0.3
80+
BOTIUM_SPEECH_CONVERT_PROFILE_SILENCEX3_DESC=Replaces three short sections with silence

frontend/src/convert/convert.js

Lines changed: 64 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,75 @@
11
const fs = require('fs')
22
const Mustache = require('mustache')
3-
const { spawn } = require('child_process')
3+
const { spawn, exec } = require('child_process')
44
const { v1: uuidv1 } = require('uuid')
55
const debug = require('debug')('botium-speech-processing-convert')
66

7-
const runconvert = (cmdLine, outputFile, { inputBuffer, start, end }) => {
7+
const _getSoxFileType = (filename) => {
88
return new Promise((resolve, reject) => {
9-
const jobId = uuidv1()
9+
exec(`soxi -t ${filename}`, (err, stdout, stderr) => {
10+
if (err) return reject(err)
11+
if (stderr) return reject(stderr.trim())
12+
resolve(stdout.trim())
13+
})
14+
})
15+
}
16+
17+
const _isMP3 = (buf) => {
18+
if (!buf || buf.length < 3) {
19+
return false
20+
}
21+
return (buf[0] === 73 &&
22+
buf[1] === 68 &&
23+
buf[2] === 51) || (
24+
buf[0] === 255 &&
25+
(buf[1] === 251 || buf[1] === 250)
26+
)
27+
}
28+
29+
const runconvert = async (cmdLine, outputName, { inputBuffer, start, end }) => {
30+
const jobId = uuidv1()
1031

11-
const output = `${process.env.BOTIUM_SPEECH_TMP_DIR || '/tmp'}/${jobId}_${outputFile}`
12-
const input = cmdLine.indexOf('{{{input}}}') >= 0 ? `${process.env.BOTIUM_SPEECH_TMP_DIR || '/tmp'}/${jobId}_input` : null
32+
const writeInput = !outputName || cmdLine.indexOf('{{{input}}}') >= 0 || cmdLine.indexOf('{{{inputtype}}}') >= 0
1333

14-
if (input) {
34+
let input = null
35+
let inputtype = null
36+
37+
if (writeInput) {
38+
input = `${process.env.BOTIUM_SPEECH_TMP_DIR || '/tmp'}/${jobId}_input`
39+
try {
40+
fs.writeFileSync(input, inputBuffer)
41+
} catch (err) {
42+
debug(`conversion process input file ${input} not writable: ${err.message}`)
43+
throw new Error('conversion process input file not writable')
44+
}
45+
if (_isMP3(inputBuffer)) {
46+
inputtype = 'mp3'
47+
} else {
1548
try {
16-
fs.writeFileSync(input, inputBuffer)
49+
inputtype = await _getSoxFileType(input)
50+
debug(`Identified input type: ${inputtype}`)
1751
} catch (err) {
18-
reject(new Error(`conversion process input file ${input} not writable: ${err.message}`))
52+
debug(`identification of input file type ${input} failed: ${err.message}`)
53+
throw new Error('identification of input file type failed')
1954
}
2055
}
21-
22-
let cmdLineFull = Mustache.render(cmdLine, { output, input })
23-
if (start && end) {
24-
cmdLineFull = `${cmdLineFull} trim ${start} ${end}`
25-
} else if (start && !end) {
26-
cmdLineFull = `${cmdLineFull} trim ${start}`
27-
} else if (!start && end) {
28-
cmdLineFull = `${cmdLineFull} trim 0 ${end}`
56+
if (!outputName) {
57+
outputName = `output.${inputtype}`
2958
}
30-
debug(`cmdLineFull: ${cmdLineFull}`)
59+
}
60+
const output = `${process.env.BOTIUM_SPEECH_TMP_DIR || '/tmp'}/${jobId}_${outputName}`
61+
62+
let cmdLineFull = Mustache.render(cmdLine, { output, input, inputtype })
63+
if (start && end) {
64+
cmdLineFull = `${cmdLineFull} trim ${start} ${end}`
65+
} else if (start && !end) {
66+
cmdLineFull = `${cmdLineFull} trim ${start}`
67+
} else if (!start && end) {
68+
cmdLineFull = `${cmdLineFull} trim 0 ${end}`
69+
}
70+
debug(`cmdLineFull: ${cmdLineFull}`)
71+
72+
return new Promise((resolve, reject) => {
3173
const childProcess = spawn('/bin/sh', ['-c', cmdLineFull])
3274

3375
childProcess.once('exit', (code, signal) => {
@@ -36,7 +78,10 @@ const runconvert = (cmdLine, outputFile, { inputBuffer, start, end }) => {
3678
try {
3779
const outputBuffer = fs.readFileSync(output)
3880
fs.unlinkSync(output)
39-
resolve(outputBuffer)
81+
resolve({
82+
outputName,
83+
outputBuffer
84+
})
4085
} catch (err) {
4186
reject(new Error(`conversion process output file ${output} not readable: ${err.message}`))
4287
}
@@ -68,7 +113,7 @@ const runconvert = (cmdLine, outputFile, { inputBuffer, start, end }) => {
68113
debug('stderr ' + data)
69114
})
70115

71-
if (!input) {
116+
if (cmdLine.indexOf('{{{input}}}') < 0) {
72117
childProcess.stdin.write(inputBuffer)
73118
}
74119
childProcess.stdin.end()

frontend/src/routes.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -442,9 +442,9 @@ router.post('/api/convert/:profile', async (req, res, next) => {
442442
const envVarOutput = `BOTIUM_SPEECH_CONVERT_PROFILE_${req.params.profile.toUpperCase()}_OUTPUT`
443443

444444
try {
445-
const outputBuffer = await runconvert(process.env[envVarCmd], process.env[envVarOutput] || 'output.wav', { inputBuffer: req.body, start: req.query.start, end: req.query.end })
445+
const { outputName, outputBuffer } = await runconvert(process.env[envVarCmd], process.env[envVarOutput], { inputBuffer: req.body, start: req.query.start, end: req.query.end })
446446
res.writeHead(200, {
447-
'Content-disposition': `attachment; filename="${process.env[envVarOutput] || 'output.wav'}"`,
447+
'Content-disposition': `attachment; filename="${outputName}"`,
448448
'Content-Length': outputBuffer.length
449449
})
450450
res.end(outputBuffer)

0 commit comments

Comments
 (0)