|
| 1 | +// SPDX-FileCopyrightText: 2024 LiveKit, Inc. |
| 2 | +// |
| 3 | +// SPDX-License-Identifier: Apache-2.0 |
| 4 | + |
| 5 | +import { STT, SpeechEvent, SpeechEventType, SpeechStream } from './stt'; |
| 6 | +import { VADEventType, VADStream } from '../vad'; |
| 7 | +import { AudioFrame } from '@livekit/rtc-node'; |
| 8 | +import { AudioBuffer, mergeFrames } from '../utils'; |
| 9 | + |
| 10 | +export class StreamAdapterWrapper extends SpeechStream { |
| 11 | + closed: boolean; |
| 12 | + stt: STT; |
| 13 | + vadStream: VADStream; |
| 14 | + eventQueue: (SpeechEvent | undefined)[]; |
| 15 | + language: string | undefined; |
| 16 | + task: { |
| 17 | + run: Promise<void>; |
| 18 | + cancel: () => void; |
| 19 | + }; |
| 20 | + |
| 21 | + constructor(stt: STT, vadStream: VADStream, language: string | undefined = undefined) { |
| 22 | + super(); |
| 23 | + this.closed = false; |
| 24 | + this.stt = stt; |
| 25 | + this.vadStream = vadStream; |
| 26 | + this.eventQueue = []; |
| 27 | + this.language = language; |
| 28 | + this.task = { |
| 29 | + run: new Promise((_, reject) => { |
| 30 | + this.run(reject); |
| 31 | + }), |
| 32 | + cancel: () => {}, |
| 33 | + }; |
| 34 | + } |
| 35 | + |
| 36 | + async run(reject: (arg: Error) => void) { |
| 37 | + this.task.cancel = () => { |
| 38 | + this.closed = true; |
| 39 | + reject(new Error('cancelled')); |
| 40 | + }; |
| 41 | + |
| 42 | + for (const event of this.vadStream) { |
| 43 | + if (event.type == VADEventType.START_OF_SPEECH) { |
| 44 | + const startEvent = new SpeechEvent(SpeechEventType.START_OF_SPEECH); |
| 45 | + this.eventQueue.push(startEvent); |
| 46 | + } else if (event.type == VADEventType.END_OF_SPEECH) { |
| 47 | + const mergedFrames = mergeFrames(event.speech); |
| 48 | + const endEvent = await this.stt.recognize(mergedFrames, this.language); |
| 49 | + this.eventQueue.push(endEvent); |
| 50 | + } |
| 51 | + } |
| 52 | + |
| 53 | + this.eventQueue.push(undefined); |
| 54 | + } |
| 55 | + |
| 56 | + pushFrame(frame: AudioFrame) { |
| 57 | + if (this.closed) { |
| 58 | + throw new TypeError('cannot push frame to closed stream'); |
| 59 | + } |
| 60 | + |
| 61 | + this.vadStream.pushFrame(frame); |
| 62 | + } |
| 63 | + |
| 64 | + async close(wait: boolean = true): Promise<void> { |
| 65 | + this.closed = true; |
| 66 | + |
| 67 | + if (!wait) { |
| 68 | + this.task.cancel(); |
| 69 | + } |
| 70 | + |
| 71 | + await this.vadStream.close(wait); |
| 72 | + await this.task.run; |
| 73 | + } |
| 74 | + |
| 75 | + next(): IteratorResult<SpeechEvent> { |
| 76 | + const item = this.eventQueue.shift(); |
| 77 | + if (item) { |
| 78 | + return { done: false, value: item }; |
| 79 | + } else { |
| 80 | + return { done: true, value: undefined }; |
| 81 | + } |
| 82 | + } |
| 83 | +} |
| 84 | + |
| 85 | +export class StreamAdapter extends STT { |
| 86 | + stt: STT; |
| 87 | + vadStream: VADStream; |
| 88 | + |
| 89 | + constructor(stt: STT, vadStream: VADStream) { |
| 90 | + super(true); |
| 91 | + this.stt = stt; |
| 92 | + this.vadStream = vadStream; |
| 93 | + } |
| 94 | + |
| 95 | + async recognize( |
| 96 | + buffer: AudioBuffer, |
| 97 | + language: string | undefined = undefined, |
| 98 | + ): Promise<SpeechEvent> { |
| 99 | + return await this.stt.recognize(buffer, language); |
| 100 | + } |
| 101 | + |
| 102 | + stream(language: string | undefined = undefined) { |
| 103 | + return new StreamAdapterWrapper(this.stt, this.vadStream, language); |
| 104 | + } |
| 105 | +} |
0 commit comments