Skip to content

Commit 5494fae

Browse files
committed
Add TTS and tokenizer
1 parent 1b70eda commit 5494fae

File tree

8 files changed

+185
-8
lines changed

8 files changed

+185
-8
lines changed

bun.lockb

691 Bytes
Binary file not shown.

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
"typescript": "^5.0.0"
2525
},
2626
"dependencies": {
27+
"commander": "^12.0.0",
2728
"@livekit/protocol": "^1.12.0"
2829
}
2930
}

src/index.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,6 @@
22
//
33
// SPDX-License-Identifier: Apache-2.0
44

5-
export { VAD, VADEventType, VADStream } from './vad';
6-
export { Plugin } from './plugin';
7-
export { version } from './version';
5+
export * from './vad';
6+
export * from './plugin';
7+
export * from './version';

src/tokenize.ts

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
2+
//
3+
// SPDX-License-Identifier: Apache-2.0
4+
5+
export interface SegmentedSentence {
6+
text: string;
7+
}
8+
9+
export abstract class SentenceTokenizer {
10+
abstract tokenize(text: string, language: string | undefined): SegmentedSentence[];
11+
abstract stream(language: string | undefined): SentenceStream;
12+
}
13+
14+
export abstract class SentenceStream implements IterableIterator<SegmentedSentence> {
15+
abstract pushText(text: string): void;
16+
abstract flush(): Promise<void>;
17+
async close(): Promise<void> {}
18+
abstract next(): IteratorResult<SegmentedSentence>;
19+
[Symbol.iterator](): SentenceStream {
20+
return this;
21+
}
22+
}

src/tts/index.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
2+
//
3+
// SPDX-License-Identifier: Apache-2.0
4+
5+
export { TTS, SynthesisEvent, SynthesisEventType, SynthesizedAudio, SynthesizeStream } from './tts';
6+
export { StreamAdapter, StreamAdapterWrapper } from './stream_adapter';

src/tts/stream_adapter.ts

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
2+
//
3+
// SPDX-License-Identifier: Apache-2.0
4+
5+
import { TTS, SynthesisEvent, SynthesisEventType, SynthesizedAudio, SynthesizeStream } from './tts';
6+
import { SentenceStream, SentenceTokenizer } from '../tokenize';
7+
8+
export class StreamAdapterWrapper extends SynthesizeStream {
9+
closed: boolean;
10+
tts: TTS;
11+
sentenceStream: SentenceStream;
12+
eventQueue: (SynthesisEvent | undefined)[];
13+
task: {
14+
run: Promise<void>;
15+
cancel: () => void;
16+
};
17+
18+
constructor(tts: TTS, sentenceStream: SentenceStream) {
19+
super();
20+
this.closed = false;
21+
this.tts = tts;
22+
this.sentenceStream = sentenceStream;
23+
this.eventQueue = [];
24+
this.task = {
25+
run: new Promise((_, reject) => {
26+
this.run(reject);
27+
}),
28+
cancel: () => {},
29+
};
30+
}
31+
32+
async run(reject: (arg: Error) => void) {
33+
while (!this.closed) {
34+
this.task.cancel = () => {
35+
this.closed = true;
36+
reject(new Error('cancelled'));
37+
};
38+
for await (const sentence of this.sentenceStream) {
39+
const audio = await this.tts.synthesize(sentence.text);
40+
this.eventQueue.push(new SynthesisEvent(SynthesisEventType.STARTED));
41+
this.eventQueue.push(new SynthesisEvent(SynthesisEventType.AUDIO, audio));
42+
this.eventQueue.push(new SynthesisEvent(SynthesisEventType.FINISHED));
43+
}
44+
}
45+
}
46+
47+
pushText(token: string) {
48+
this.sentenceStream.pushText(token);
49+
}
50+
51+
async flush() {
52+
await this.sentenceStream.flush();
53+
}
54+
55+
next(): IteratorResult<SynthesisEvent> {
56+
const event = this.eventQueue.shift();
57+
if (event) {
58+
return { done: false, value: event };
59+
} else {
60+
return { done: true, value: undefined };
61+
}
62+
}
63+
64+
async close(): Promise<void> {
65+
this.task.cancel();
66+
try {
67+
await this.task.run;
68+
} finally {
69+
this.eventQueue.push(undefined);
70+
}
71+
}
72+
}
73+
74+
export class StreamAdapter extends TTS {
75+
tts: TTS;
76+
tokenizer: SentenceTokenizer;
77+
78+
constructor(tts: TTS, tokenizer: SentenceTokenizer) {
79+
super(true);
80+
this.tts = tts;
81+
this.tokenizer = tokenizer;
82+
}
83+
84+
synthesize(text: string): Promise<SynthesizedAudio> {
85+
return this.tts.synthesize(text);
86+
}
87+
88+
stream() {
89+
return new StreamAdapterWrapper(this.tts, this.tokenizer.stream(undefined));
90+
}
91+
}

src/tts/tts.ts

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
2+
//
3+
// SPDX-License-Identifier: Apache-2.0
4+
5+
import { AudioFrame } from '@livekit/rtc-node';
6+
7+
export interface SynthesizedAudio {
8+
text: string;
9+
data: AudioFrame;
10+
}
11+
12+
export enum SynthesisEventType {
13+
STARTED = 0,
14+
AUDIO = 1,
15+
FINISHED = 2,
16+
}
17+
18+
export class SynthesisEvent {
19+
type: SynthesisEventType;
20+
audio: SynthesizedAudio | undefined;
21+
22+
constructor(type: SynthesisEventType, audio: SynthesizedAudio | undefined = undefined) {
23+
this.type = type;
24+
this.audio = audio;
25+
}
26+
}
27+
28+
export abstract class SynthesizeStream implements IterableIterator<SynthesisEvent> {
29+
abstract pushText(token: string | undefined): void;
30+
31+
markSegmentEnd() {
32+
this.pushText(undefined);
33+
}
34+
35+
abstract close(wait: boolean): Promise<void>;
36+
abstract next(): IteratorResult<SynthesisEvent>;
37+
38+
[Symbol.iterator](): SynthesizeStream {
39+
return this;
40+
}
41+
}
42+
43+
export abstract class TTS {
44+
#streamingSupported: boolean;
45+
46+
constructor(streamingSupported: boolean) {
47+
this.#streamingSupported = streamingSupported;
48+
}
49+
50+
abstract synthesize(text: string): Promise<SynthesizedAudio>;
51+
52+
abstract stream(): SynthesizeStream;
53+
54+
get streamingSupported(): boolean {
55+
return this.#streamingSupported;
56+
}
57+
}

src/vad.ts

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ export enum VADEventType {
1010
END_OF_SPEECH = 3,
1111
}
1212

13-
interface VADEvent {
13+
export interface VADEvent {
1414
type: VADEventType;
1515
samplesIndex: number;
1616
duration: number;
@@ -33,11 +33,11 @@ export abstract class VAD {
3333
}): VADStream;
3434
}
3535

36-
export abstract class VADStream {
36+
export abstract class VADStream implements IterableIterator<VADEvent> {
3737
abstract pushFrame(frame: AudioFrame): void;
38-
abstract aclose(wait: boolean): Promise<void>;
39-
abstract anext(): Promise<VADEvent>;
40-
private aiter(): VADStream {
38+
abstract close(wait: boolean): Promise<void>;
39+
abstract next(): IteratorResult<VADEvent>;
40+
[Symbol.iterator](): VADStream {
4141
return this;
4242
}
4343
}

0 commit comments

Comments
 (0)