forked from schwers/asr-sample-service
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtranscription.js
More file actions
97 lines (86 loc) · 3.85 KB
/
transcription.js
File metadata and controls
97 lines (86 loc) · 3.85 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import Axios from "axios";
import { updateTranscriptResultChunkStatus, updateTranscriptResult, updateUserDB, getDBTranscriptResult, getUserJobIds } from "./db.js";
import { STATUS_DONE, STATUS_PENDING, STATUS_FAILED } from "./constants.js";
let globalJobId = "1";
function getASROutput(jobId, audioChunkPath, retries = 3, backoffDuration = 10) {
updateTranscriptResultChunkStatus({ jobId, audioChunkPath, chunkStatus: STATUS_PENDING });
return new Promise((resolve, reject) => {
// NOTE: in production we should cache responses to the ASR model to optimize getting this data
function invokeASR(n) {
Axios.get(`http://localhost:3000/get-asr-output?path=${audioChunkPath}`)
.then(resp => {
updateTranscriptResultChunkStatus({ jobId, audioChunkPath, chunkStatus: STATUS_DONE });
resolve(resp.data.transcript);
})
.catch(err => {
if (n === retries) {
updateTranscriptResultChunkStatus({ jobId, audioChunkPath, chunkStatus: STATUS_FAILED });
reject(err);
} else {
setTimeout(() => {
invokeASR(n + 1);
}, backoffDuration * Math.pow(2, n));
}
});
}
invokeASR(0)
});
}
async function transcribeAndStitch({ userId, jobId, audioChunkPaths}){
updateTranscriptResult({ jobId, jobStatus: STATUS_PENDING })
updateUserDB({ userId, jobId, jobStatus: STATUS_PENDING });
const promisesGetASROutputs = audioChunkPaths.map(audioChunkPath => getASROutput(jobId, audioChunkPath));
const results = await Promise.all(promisesGetASROutputs)
.then(asrOutputs => {
const finalTranscription = asrOutputs.join(" ");
updateTranscriptResult({ jobId, jobStatus: STATUS_DONE, transcriptText: finalTranscription, completedTime: new Date().toISOString() })
updateUserDB({ userId, jobId, jobStatus: STATUS_DONE });
return finalTranscription;
})
.catch(err => {
updateTranscriptResult({ jobId, jobStatus: STATUS_FAILED, completedTime: new Date().toISOString() })
updateUserDB({ userId, jobId, jobStatus: STATUS_FAILED });
});
return results;
}
/**
* Starts a transcription job with the audio chunks for the provided user.
*
* @param {string[]} audioChunkPaths - The list of paths to the audio chunks.
* @param {string} userId - The user's ID.
* @returns {string} - The job ID that was started.
*/
export async function startTranscribeJob({ audioChunkPaths, userId }){
// NOTE: in production we would use a legitimate UUID
const jobId = globalJobId;
globalJobId = (Number(globalJobId) + 1).toString();
// start job, in-parallel process all the audiochunks
// NOTE: I don't use await here to illustrate the idea that this API call is not blocked by the finishing of the job
// (the API call starts the job process)
transcribeAndStitch({ userId, jobId, audioChunkPaths });
return jobId;
}
/**
* For a given job ID, returns the transcript result for that job, which describes the transcribed text, statuses,
* and completion time.
*
* @param {string} jobId - The job ID.
* @returns {TranscriptResult} - Object describing the transcribed text, statuses of the audio chunk transcriptions, job status, and completion time.
*/
export function getTranscriptResult(jobId) {
return getDBTranscriptResult(jobId);
}
/**
* For a given userId and jobStatus, returns the transcript results that apply to that user and job status.
*
* @param {string} jobStatus - The job status.
* @param {string} userId - The user's ID.
* @returns {TranscriptResult[]} - List of objects describing the transcribed text, statuses of the audio chunk transcriptions, job status, and completion time.
*/
export function getUserTranscriptResults({ jobStatus, userId }) {
const jobIds = getUserJobIds({ userId, jobStatus });
if (!jobIds) {
return undefined
}
return [...jobIds].map(jobId => getTranscriptResult(jobId));
}