Skip to content

Commit b643866

Browse files
BOT-3297 add Content-Duration headers
1 parent 9466b82 commit b643866

File tree

3 files changed

+191
-94
lines changed

3 files changed

+191
-94
lines changed

frontend/src/convert/convert.js

Lines changed: 29 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,9 @@
11
const fs = require('fs')
22
const Mustache = require('mustache')
3-
const { spawn, exec } = require('child_process')
43
const { v1: uuidv1 } = require('uuid')
4+
const { runShellCommand, getSoxFileType, isBufferMP3, getAudioLengthSeconds } = require('../soxi')
55
const debug = require('debug')('botium-speech-processing-convert')
66

7-
const _getSoxFileType = (filename) => {
8-
return new Promise((resolve, reject) => {
9-
exec(`soxi -t ${filename}`, (err, stdout, stderr) => {
10-
if (err) return reject(err)
11-
if (stderr) return reject(stderr.trim())
12-
resolve(stdout.trim())
13-
})
14-
})
15-
}
16-
17-
const _isMP3 = (buf) => {
18-
if (!buf || buf.length < 3) {
19-
return false
20-
}
21-
return (buf[0] === 73 &&
22-
buf[1] === 68 &&
23-
buf[2] === 51) || (
24-
buf[0] === 255 &&
25-
(buf[1] >= 224)
26-
)
27-
}
28-
297
const pcmtowav = async (inputBuffer, { sampleRate = 16000, bitDepth = 16, channelCount = 1 }) => {
308
const result = await runconvert(`sox -r ${sampleRate} -e signed -b ${bitDepth} -c ${channelCount} {{{input}}} {{{output}}}`, 'output.wav', { inputBuffer, inputType: 'raw' })
319
return result.outputBuffer
@@ -48,11 +26,11 @@ const runconvert = async (cmdLine, outputName, { inputBuffer, inputType, start,
4826
throw new Error('conversion process input file not writable')
4927
}
5028
if (!inputtype) {
51-
if (_isMP3(inputBuffer)) {
29+
if (isBufferMP3(inputBuffer)) {
5230
inputtype = 'mp3'
5331
} else {
5432
try {
55-
inputtype = await _getSoxFileType(input)
33+
inputtype = await getSoxFileType(input)
5634
debug(`Identified input type: ${inputtype}`)
5735
} catch (err) {
5836
debug(`identification of input file type ${input} failed: ${err.message}`)
@@ -84,68 +62,36 @@ const runconvert = async (cmdLine, outputName, { inputBuffer, inputType, start,
8462
}
8563
debug(`cmdLineFull: ${cmdLineFull}`)
8664

87-
return new Promise((resolve, reject) => {
88-
const childProcess = spawn('/bin/sh', ['-c', cmdLineFull])
65+
try {
66+
await runShellCommand(cmdLineFull, cmdLine.indexOf('{{{input}}}') < 0 ? inputBuffer : null)
8967

90-
const childProcessErr = []
91-
const formatChildProcessErr = (header) => {
92-
const lines = [
93-
header,
94-
...childProcessErr
95-
].filter(l => l).map(l => l.trim()).filter(l => l)
96-
return lines.join('\n')
68+
let outputBuffer = null
69+
let outputDuration = null
70+
try {
71+
outputDuration = await getAudioLengthSeconds(output)
72+
} catch (err) {
73+
debug(`no audio length readable for ${output}: ${err.message}`)
9774
}
98-
99-
childProcess.once('exit', (code, signal) => {
100-
debug(`conversion process exited with code ${code}, signal ${signal}`)
101-
if (code === 0) {
102-
try {
103-
const outputBuffer = fs.readFileSync(output)
104-
fs.unlinkSync(output)
105-
resolve({
106-
outputName,
107-
outputBuffer
108-
})
109-
} catch (err) {
110-
reject(new Error(`conversion process output file ${output} not readable: ${err.message}`))
111-
}
112-
} else {
113-
reject(new Error(formatChildProcessErr(`conversion process exited with failure code ${code}${signal ? `, signal ${signal}` : ''}`)))
114-
}
115-
if (input) {
116-
try {
117-
fs.unlinkSync(input)
118-
} catch (err) {
119-
debug(`conversion process input file ${input} not deleted: ${err.message}`)
120-
}
75+
try {
76+
outputBuffer = fs.readFileSync(output)
77+
fs.unlinkSync(output)
78+
} catch (err) {
79+
throw new Error(`conversion process output file ${output} not readable: ${err.message}`)
80+
}
81+
return {
82+
outputName,
83+
outputBuffer,
84+
outputDuration
85+
}
86+
} finally {
87+
if (input) {
88+
try {
89+
fs.unlinkSync(input)
90+
} catch (err) {
91+
debug(`conversion process input file ${input} not deleted: ${err.message}`)
12192
}
122-
})
123-
childProcess.once('error', (err) => {
124-
debug(`conversion process failed: ${err.message}`)
125-
reject(new Error(formatChildProcessErr(`conversion process failed: ${err.message}`)))
126-
})
127-
childProcess.stdout.on('error', (err) => {
128-
debug('stdout err ' + err)
129-
childProcessErr.push(`${err.message}`)
130-
})
131-
childProcess.stderr.on('error', (err) => {
132-
debug('stderr err ' + err)
133-
childProcessErr.push(`${err.message}`)
134-
})
135-
childProcess.stdin.on('error', (err) => {
136-
debug('stdin err ' + err)
137-
childProcessErr.push(`${err.message}`)
138-
})
139-
childProcess.stderr.on('data', (data) => {
140-
debug('stderr ' + data)
141-
childProcessErr.push(`${data}`)
142-
})
143-
144-
if (cmdLine.indexOf('{{{input}}}') < 0) {
145-
childProcess.stdin.write(inputBuffer)
14693
}
147-
childProcess.stdin.end()
148-
})
94+
}
14995
}
15096

15197
module.exports = {

frontend/src/routes.js

Lines changed: 43 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ const contentDisposition = require('content-disposition')
1111
const { WebSocketServer } = require('ws')
1212
const { runconvert } = require('./convert/convert')
1313
const { wer, readBaseUrls } = require('./utils')
14+
const { getAudioLengthSeconds } = require('./soxi')
1415
const debug = require('debug')('botium-speech-processing-routes')
1516

1617
const cachePathStt = (process.env.BOTIUM_SPEECH_CACHE_DIR && path.join(process.env.BOTIUM_SPEECH_CACHE_DIR, 'stt')) || './resources/.cache/stt'
@@ -75,6 +76,24 @@ const extractMultipartContent = (req, res) => new Promise((resolve, reject) => {
7576
})
7677
})
7778

79+
const _addContentDurationHeadersForFile = async (name, filenameOrBuffer, headers = {}) => {
80+
try {
81+
const outputDuration = await getAudioLengthSeconds(filenameOrBuffer)
82+
return _addContentDurationHeaders(outputDuration, headers)
83+
} catch (err) {
84+
debug(`no audio length readable for ${name}: ${err.message}`)
85+
return headers
86+
}
87+
}
88+
89+
const _addContentDurationHeaders = (outputDuration, headers = {}) => {
90+
if (outputDuration >= 0) {
91+
headers['Content-Duration'] = outputDuration.toFixed(0)
92+
headers['X-Content-Duration'] = outputDuration.toFixed(3)
93+
}
94+
return headers
95+
}
96+
7897
const router = express.Router()
7998

8099
/**
@@ -416,10 +435,12 @@ router.post('/api/stt/:language', async (req, res, next) => {
416435
const name = fs.readFileSync(cacheFileName).toString()
417436
const buffer = fs.readFileSync(cacheFileBuffer)
418437
debug(`Reading tts result ${cacheFileName} from cache: ${name}`)
419-
res.writeHead(200, {
438+
const headers = {
420439
'Content-disposition': `${contentDisposition(name)}`,
421440
'Content-Length': buffer.length
422-
})
441+
}
442+
await _addContentDurationHeadersForFile(name, cacheFileBuffer, headers)
443+
res.writeHead(200, headers)
423444
return res.end(buffer)
424445
} catch (err) {
425446
debug(`Failed reading tts result ${cacheFileName} from cache: ${err.message}`)
@@ -435,10 +456,12 @@ router.post('/api/stt/:language', async (req, res, next) => {
435456
voice: req.query.voice,
436457
text: req.query.text
437458
})
438-
res.writeHead(200, {
459+
const headers = {
439460
'Content-disposition': `${contentDisposition(name)}`,
440461
'Content-Length': buffer.length
441-
})
462+
}
463+
await _addContentDurationHeadersForFile(name, buffer, headers)
464+
res.writeHead(200, headers)
442465
res.end(buffer)
443466

444467
if (!skipCache && cachePathTts) {
@@ -550,11 +573,13 @@ router.post('/api/convert/:profile', async (req, res, next) => {
550573
const envVarOutput = `BOTIUM_SPEECH_CONVERT_PROFILE_${req.params.profile.toUpperCase()}_OUTPUT`
551574

552575
try {
553-
const { outputName, outputBuffer } = await runconvert(process.env[envVarCmd], process.env[envVarOutput], { inputBuffer: buffer, start: req.query.start, end: req.query.end })
554-
res.writeHead(200, {
576+
const { outputName, outputBuffer, outputDuration } = await runconvert(process.env[envVarCmd], process.env[envVarOutput], { inputBuffer: buffer, start: req.query.start, end: req.query.end })
577+
const headers = {
555578
'Content-disposition': `attachment; filename="${outputName}"`,
556579
'Content-Length': outputBuffer.length
557-
})
580+
}
581+
_addContentDurationHeaders(outputDuration, headers)
582+
res.writeHead(200, headers)
558583
res.end(outputBuffer)
559584
} catch (err) {
560585
return next(err)
@@ -624,6 +649,7 @@ router.post('/api/convert', async (req, res, next) => {
624649
const profiles = _.isString(req.query.profile) ? [req.query.profile] : _.isArray(req.query.profile) ? req.query.profile : []
625650
let transformBuffer = buffer
626651
let transformName = null
652+
let transformDuration = null
627653
for (const profile of profiles) {
628654
const envVarCmd = `BOTIUM_SPEECH_CONVERT_PROFILE_${profile.toUpperCase()}_CMD`
629655
if (!process.env[envVarCmd]) {
@@ -632,17 +658,20 @@ router.post('/api/convert', async (req, res, next) => {
632658
const envVarOutput = `BOTIUM_SPEECH_CONVERT_PROFILE_${profile.toUpperCase()}_OUTPUT`
633659

634660
try {
635-
const { outputName, outputBuffer } = await runconvert(process.env[envVarCmd], process.env[envVarOutput], { inputBuffer: transformBuffer, start: req.query.start, end: req.query.end })
661+
const { outputName, outputBuffer, outputDuration } = await runconvert(process.env[envVarCmd], process.env[envVarOutput], { inputBuffer: transformBuffer, start: req.query.start, end: req.query.end })
636662
transformBuffer = outputBuffer
637663
transformName = outputName
664+
transformDuration = outputDuration
638665
} catch (err) {
639666
return next(err)
640667
}
641668
}
642-
res.writeHead(200, {
669+
const headers = {
643670
'Content-disposition': `attachment; filename="${transformName}"`,
644671
'Content-Length': transformBuffer.length
645-
})
672+
}
673+
_addContentDurationHeaders(transformDuration, headers)
674+
res.writeHead(200, headers)
646675
res.end(transformBuffer)
647676
})
648677

@@ -726,6 +755,7 @@ const wssStreams = {}
726755
const streamId = uuidv1()
727756
const stream = await stt.stt_OpenStream(req, { language: req.params.language })
728757
stream.events.on('close', () => delete wssStreams[streamId])
758+
stream.dateTimeStart = new Date()
729759
wssStreams[streamId] = stream
730760

731761
const baseUrls = readBaseUrls(req)
@@ -764,7 +794,8 @@ const wssStreams = {}
764794
;[router.get.bind(router), router.post.bind(router)].forEach(m => m('/api/sttstatus/:streamId', async (req, res, next) => {
765795
const stream = wssStreams[req.params.streamId]
766796
if (stream) {
767-
res.status(200).json({ status: 'OK', streamId: req.params.streamId })
797+
const streamDuration = ((new Date() - stream.dateTimeStart) / 1000).toFixed(3)
798+
res.status(200).json({ status: 'OK', streamId: req.params.streamId, streamDuration })
768799
} else {
769800
res.status(404).json({ status: 'NOTFOUND', streamId: req.params.streamId })
770801
}
@@ -810,6 +841,7 @@ const wssUpgrade = (req, socket, head) => {
810841
const wss1 = new WebSocketServer({ noServer: true })
811842
wss1.on('connection', async (ws) => {
812843
stream.events.on('data', (data) => {
844+
data.streamDuration = ((new Date() - stream.dateTimeStart) / 1000).toFixed(3)
813845
ws.send(JSON.stringify(data))
814846
})
815847
stream.events.on('close', () => {

frontend/src/soxi.js

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
const { spawn, exec } = require('child_process')
2+
const debug = require('debug')('botium-speech-processing-soxi')
3+
4+
const runShellCommand = (cmdLineFull, inputBuffer = null) => {
5+
return new Promise((resolve, reject) => {
6+
const childProcess = spawn('/bin/sh', ['-c', cmdLineFull])
7+
8+
const childProcessOutput = []
9+
const childProcessErr = []
10+
const formatChildProcessErr = (header) => {
11+
const lines = [
12+
header,
13+
...childProcessErr
14+
].filter(l => l).map(l => l.trim()).filter(l => l)
15+
return lines.join('\n')
16+
}
17+
18+
childProcess.once('exit', async (code, signal) => {
19+
if (code === 0) {
20+
resolve({
21+
childProcessOutput
22+
})
23+
} else {
24+
reject(new Error(formatChildProcessErr(`process exited with failure code ${code}${signal ? `, signal ${signal}` : ''}`)))
25+
}
26+
})
27+
childProcess.once('error', (err) => {
28+
reject(new Error(formatChildProcessErr(err.message)))
29+
})
30+
childProcess.stdout.on('error', (err) => {
31+
debug('stdout err ' + err)
32+
childProcessErr.push(`${err.message}`)
33+
})
34+
childProcess.stdout.on('data', (data) => {
35+
debug('stdout ' + data)
36+
childProcessOutput.push(`${data}`)
37+
})
38+
childProcess.stderr.on('error', (err) => {
39+
debug('stderr err ' + err)
40+
childProcessErr.push(`${err.message}`)
41+
})
42+
childProcess.stderr.on('data', (data) => {
43+
debug('stderr ' + data)
44+
childProcessErr.push(`${data}`)
45+
})
46+
childProcess.stdin.on('error', (err) => {
47+
debug('stdin err ' + err)
48+
childProcessErr.push(`${err.message}`)
49+
})
50+
51+
if (inputBuffer) {
52+
childProcess.stdin.write(inputBuffer)
53+
}
54+
childProcess.stdin.end()
55+
})
56+
}
57+
58+
const getSoxFileType = (filename) => {
59+
return new Promise((resolve, reject) => {
60+
exec(`soxi -t ${filename}`, (err, stdout, stderr) => {
61+
if (err) return reject(err)
62+
if (stderr) return reject(stderr.trim())
63+
resolve(stdout.trim())
64+
})
65+
})
66+
}
67+
68+
const getAudioLengthSeconds = (filenameOrBuffer) => {
69+
if (Buffer.isBuffer(filenameOrBuffer)) {
70+
return runShellCommand('soxi -D -', filenameOrBuffer)
71+
.then(({ childProcessOutput }) => {
72+
if (childProcessOutput.length > 0) {
73+
const out = childProcessOutput[0].trim()
74+
try {
75+
return parseFloat(out)
76+
} catch (err) {
77+
throw new Error(`Parsing SOXI output "${out}" failed: ${err.message}`)
78+
}
79+
} else {
80+
throw new Error('output empty')
81+
}
82+
})
83+
.catch((err) => {
84+
throw new Error(`Parsing SOXI output failed: ${err.message}`)
85+
})
86+
} else {
87+
return new Promise((resolve, reject) => {
88+
exec(`soxi -D ${filenameOrBuffer}`, (err, stdout, stderr) => {
89+
if (err) return reject(err)
90+
if (stderr) return reject(stderr.trim())
91+
const out = stdout.trim()
92+
try {
93+
return resolve(parseFloat(out))
94+
} catch (err) {
95+
return reject(new Error(`Parsing SOXI output "${out}" failed: ${err.message}`))
96+
}
97+
})
98+
})
99+
}
100+
}
101+
102+
const isBufferMP3 = (buf) => {
103+
if (!buf || buf.length < 3) {
104+
return false
105+
}
106+
return (buf[0] === 73 &&
107+
buf[1] === 68 &&
108+
buf[2] === 51) || (
109+
buf[0] === 255 &&
110+
(buf[1] >= 224)
111+
)
112+
}
113+
114+
module.exports = {
115+
runShellCommand,
116+
getSoxFileType,
117+
getAudioLengthSeconds,
118+
isBufferMP3
119+
}

0 commit comments

Comments
 (0)