Skip to content

Commit 3ff96c8

Browse files
author
Florian Treml
committed
BOT-2463 add support for AWS Transcribe
1 parent 99c0072 commit 3ff96c8

File tree

4 files changed

+104
-12
lines changed

4 files changed

+104
-12
lines changed

frontend/package.json

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,10 @@
1313
"author": "Botium GmbH",
1414
"license": "MIT",
1515
"dependencies": {
16-
"@aws-sdk/client-polly": "^3.47.1",
17-
"@aws-sdk/client-transcribe-streaming": "^3.47.1",
16+
"@aws-sdk/client-polly": "^3.48.0",
17+
"@aws-sdk/client-s3": "^3.48.0",
18+
"@aws-sdk/client-transcribe": "^3.48.0",
19+
"@aws-sdk/client-transcribe-streaming": "^3.48.0",
1820
"@google-cloud/speech": "^4.10.0",
1921
"@google-cloud/storage": "^5.18.0",
2022
"@google-cloud/text-to-speech": "^3.4.0",

frontend/resources/.env

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ BOTIUM_SPEECH_AZURE_REGION=
5757
BOTIUM_SPEECH_AWS_REGION=
5858
BOTIUM_SPEECH_AWS_ACCESS_KEY_ID=
5959
BOTIUM_SPEECH_AWS_SECRET_ACCESS_KEY=
60+
BOTIUM_SPEECH_AWS_S3_BUCKET=
6061

6162
# WAV Conversion Command Line
6263
BOTIUM_SPEECH_CONVERT_PROFILE_WAVTOMONOWAV_CMD=sox -t wav - -r 16k -t wav -c 1 -b 16 -e signed {{{output}}}

frontend/src/stt/awstranscribe.js

Lines changed: 93 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,15 @@
11
const _ = require('lodash')
2-
const { TranscribeStreamingClient, StartStreamTranscriptionCommand } = require('@aws-sdk/client-transcribe-streaming')
2+
const { v1: uuidv1 } = require('uuid')
3+
const axios = require('axios').default
4+
const { TranscribeStreamingClient, StartStreamTranscriptionCommand, MediaEncoding } = require('@aws-sdk/client-transcribe-streaming')
5+
const { TranscribeClient, StartTranscriptionJobCommand, GetTranscriptionJobCommand, DeleteTranscriptionJobCommand, TranscriptionJobStatus } = require('@aws-sdk/client-transcribe')
6+
const { S3Client, PutObjectCommand, DeleteObjectCommand } = require('@aws-sdk/client-s3')
37
const { PassThrough } = require('stream')
48
const EventEmitter = require('events')
59

610
const debug = require('debug')('botium-speech-processing-awstranscribe-stt')
711

8-
const { awstranscribeOptions } = require('../utils')
12+
const { awstranscribeOptions, applyIfExists } = require('../utils')
913

1014
const languageCodes = [
1115
'af-ZA',
@@ -67,19 +71,18 @@ class AwsTranscribeSTT {
6771

6872
const request = {
6973
LanguageCode: language,
70-
MediaEncoding: 'pcm',
74+
MediaEncoding: MediaEncoding.PCM,
7175
MediaSampleRateHertz: 16000,
7276
AudioStream: audioStream()
7377
}
74-
if (req.body && req.body.awstranscribe && req.body.awstranscribe.config) {
75-
Object.assign(request, req.body.awstranscribe.config)
76-
}
78+
applyIfExists(request, req, 'req.body.awstranscribe.config.streaming')
7779

7880
const events = new EventEmitter()
7981
try {
8082
const cmdResponse = await transcribeClient.send(new StartStreamTranscriptionCommand(request))
8183
setTimeout(async () => {
8284
try {
85+
debug('Starting to listen to TranscriptResultStream ')
8386
for await (const event of cmdResponse.TranscriptResultStream) {
8487
const results = _.get(event, 'TranscriptEvent.Transcript.Results')
8588
if (results && results.length > 0) {
@@ -96,15 +99,17 @@ class AwsTranscribeSTT {
9699
}
97100
}
98101
} catch (err) {
102+
debug(`TranscriptResultStream failure: ${err.Message || err.message || err}`)
99103
events.emit('data', {
100-
err: `${err.message}`
104+
err: `${err.message || err}`
101105
})
102106
}
103107
events.emit('close')
108+
debug('Ready listening to TranscriptResultStream ')
104109
}, 0)
105110
} catch (err) {
106-
debug(err)
107-
throw new Error(`AWS Transcribe STT streaming failed: ${err.message}`)
111+
debug(`StartStreamTranscriptionCommand failure: ${err.Message || err.message || err}`)
112+
throw new Error(`AWS Transcribe STT streaming failed: ${err.Message || err.message || err}`)
108113
}
109114
return {
110115
events,
@@ -126,7 +131,86 @@ class AwsTranscribeSTT {
126131
}
127132

128133
async stt (req, { language, buffer, hint }) {
134+
const transcribeClient = new TranscribeClient(awstranscribeOptions(req))
135+
const s3Client = new S3Client(awstranscribeOptions(req))
136+
137+
const jobId = uuidv1()
138+
139+
const putRequest = {
140+
Bucket: _.get(req, 'body.awstranscribe.credentials.bucket') || process.env.BOTIUM_SPEECH_AWS_S3_BUCKET || 'botium-speech-processing',
141+
Key: `botium-transcribe-source-${jobId}`
142+
}
143+
applyIfExists(putRequest, req, 'req.body.awstranscribe.config.s3')
129144

145+
const transcribeJobRequest = {
146+
TranscriptionJobName: `botium-transcribe-job-${jobId}`,
147+
LanguageCode: language,
148+
Media: {
149+
MediaFileUri: `s3://${putRequest.Bucket}/${putRequest.Key}`
150+
}
151+
}
152+
applyIfExists(putRequest, req, 'req.body.awstranscribe.config.transcribe')
153+
154+
try {
155+
await s3Client.send(new PutObjectCommand({
156+
...putRequest,
157+
Body: buffer
158+
}))
159+
} catch (err) {
160+
throw new Error(`S3 Upload to ${putRequest.Bucket}/${putRequest.Key} failure: ${err.message || err}`)
161+
}
162+
163+
try {
164+
let transcriptionJob = null
165+
try {
166+
const transcribeJobResponse = await transcribeClient.send(new StartTranscriptionJobCommand(transcribeJobRequest))
167+
transcriptionJob = transcribeJobResponse.TranscriptionJob
168+
} catch (err) {
169+
throw new Error(`Creating Transcription Job for ${transcribeJobRequest.Media.MediaFileUri} failure: ${err.message || err}`)
170+
}
171+
172+
while (true) {
173+
try {
174+
const jobStatus = await transcribeClient.send(new GetTranscriptionJobCommand({
175+
TranscriptionJobName: transcriptionJob.TranscriptionJobName
176+
}))
177+
debug(`Checking Transcription Job for ${transcribeJobRequest.Media.MediaFileUri} status: ${JSON.stringify(jobStatus.TranscriptionJob)}`)
178+
if (jobStatus.TranscriptionJob.TranscriptionJobStatus === TranscriptionJobStatus.COMPLETED) {
179+
try {
180+
const transcriptionFile = await axios.get(jobStatus.TranscriptionJob.Transcript.TranscriptFileUri)
181+
return {
182+
text: _.get(transcriptionFile.data, 'results.transcripts[0].transcript'),
183+
debug: transcriptionFile.data
184+
}
185+
} catch (err) {
186+
throw new Error(`Downloading Transcription Result for ${transcribeJobRequest.Media.MediaFileUri} failure: ${err.message || err}`)
187+
}
188+
} else if (jobStatus.TranscriptionJob.TranscriptionJobStatus === TranscriptionJobStatus.FAILED) {
189+
throw new Error(`Transcription Job for ${transcribeJobRequest.Media.MediaFileUri} failed, reason: ${jobStatus.TranscriptionJob.FailureReason}`)
190+
} else {
191+
await new Promise(resolve => setTimeout(resolve, 1000))
192+
}
193+
} catch (err) {
194+
throw new Error(`Checking Transcription Job Status for ${transcribeJobRequest.Media.MediaFileUri} failure: ${err.message || err}`)
195+
}
196+
}
197+
} finally {
198+
try {
199+
await s3Client.send(new DeleteObjectCommand({
200+
Bucket: putRequest.Bucket,
201+
Key: putRequest.Key
202+
}))
203+
} catch (err) {
204+
debug(`Deleting S3 Object ${putRequest.Bucket}/${putRequest.Key} failure: ${err.message || err}`)
205+
}
206+
try {
207+
await transcribeClient.send(new DeleteTranscriptionJobCommand({
208+
TranscriptionJobName: transcribeJobRequest.TranscriptionJobName
209+
}))
210+
} catch (err) {
211+
debug(`Deleting Transcription Job ${transcribeJobRequest.TranscriptionJobName} failure: ${err.message || err}`)
212+
}
213+
}
130214
}
131215
}
132216

frontend/src/utils.js

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,10 @@ const readBaseUrls = (req) => {
166166
}
167167
}
168168

169+
const applyIfExists = (target, src, p) => {
170+
return Object.assign(target, _.get(src, p) || {})
171+
}
172+
169173
module.exports = {
170174
asJson,
171175
enumValueToName,
@@ -179,5 +183,6 @@ module.exports = {
179183
azureSpeechConfig,
180184
applyExtraAzureSpeechConfig,
181185
getAzureErrorDetails,
182-
readBaseUrls
186+
readBaseUrls,
187+
applyIfExists
183188
}

0 commit comments

Comments
 (0)