Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ Currently, only the following plugins are supported:
| [@livekit/agents-plugin-openai](https://www.npmjs.com/package/@livekit/agents-plugin-openai) | LLM, TTS, STT |
| [@livekit/agents-plugin-google](https://www.npmjs.com/package/@livekit/agents-plugin-google) | LLM, TTS |
| [@livekit/agents-plugin-deepgram](https://www.npmjs.com/package/@livekit/agents-plugin-deepgram) | STT, TTS |
| [@livekit/agents-plugin-hathora](https://www.npmjs.com/package/@livekit/agents-plugin-hathora) | STT, TTS |
| [@livekit/agents-plugin-elevenlabs](https://www.npmjs.com/package/@livekit/agents-plugin-elevenlabs) | TTS |
| [@livekit/agents-plugin-cartesia](https://www.npmjs.com/package/@livekit/agents-plugin-cartesia) | TTS |
| [@livekit/agents-plugin-neuphonic](https://www.npmjs.com/package/@livekit/agents-plugin-neuphonic) | TTS |
Expand Down
17 changes: 17 additions & 0 deletions plugins/hathora/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
<!--
SPDX-FileCopyrightText: 2024 LiveKit, Inc.

SPDX-License-Identifier: Apache-2.0
-->
# Hathora plugin for LiveKit Agents

The Agents Framework is designed for building realtime, programmable
participants that run on servers. Use it to create conversational, multi-modal
voice agents that can see, hear, and understand.

This package contains the Hathora plugin, which allows for voice synthesis and speech recognition.
Refer to the [documentation](https://docs.livekit.io/agents/overview/) for
information on how to use it, or browse the [API
reference](https://docs.livekit.io/agents-js/modules/plugins_agents_plugin_hathora.html).
See the [repository](https://github.com/livekit/agents-js) for more information
about the framework as a whole.
20 changes: 20 additions & 0 deletions plugins/hathora/api-extractor.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
/**
* Config file for API Extractor. For more info, please visit: https://api-extractor.com
*/
{
"$schema": "https://developer.microsoft.com/json-schemas/api-extractor/v7/api-extractor.schema.json",

/**
* Optionally specifies another JSON config file that this file extends from. This provides a way for
* standard settings to be shared across multiple projects.
*
* If the path starts with "./" or "../", the path is resolved relative to the folder of the file that contains
* the "extends" field. Otherwise, the first path segment is interpreted as an NPM package name, and will be
* resolved using NodeJS require().
*
* SUPPORTED TOKENS: none
* DEFAULT VALUE: ""
*/
"extends": "../../api-extractor-shared.json",
"mainEntryPointFilePath": "./dist/index.d.ts"
}
53 changes: 53 additions & 0 deletions plugins/hathora/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
{
"name": "@livekit/agents-plugin-hathora",
"version": "0.1.0",
"description": "Hathora plugin for LiveKit Node Agents",
"main": "dist/index.js",
"require": "dist/index.cjs",
"types": "dist/index.d.ts",
"exports": {
"import": {
"types": "./dist/index.d.ts",
"default": "./dist/index.js"
},
"require": {
"types": "./dist/index.d.cts",
"default": "./dist/index.cjs"
}
},
"author": "LiveKit",
"type": "module",
"repository": "[email protected]:livekit/agents-js.git",
"license": "Apache-2.0",
"files": [
"dist",
"src",
"README.md"
],
"scripts": {
"build": "tsup --onSuccess \"pnpm build:types\"",
"build:types": "tsc --declaration --emitDeclarationOnly && node ../../scripts/copyDeclarationOutput.js",
"clean": "rm -rf dist",
"clean:build": "pnpm clean && pnpm build",
"lint": "eslint -f unix \"src/**/*.{ts,js}\"",
"api:check": "api-extractor run --typescript-compiler-folder ../../node_modules/typescript",
"api:update": "api-extractor run --local --typescript-compiler-folder ../../node_modules/typescript --verbose"
},
"devDependencies": {
"@livekit/agents": "workspace:*",
"@livekit/agents-plugin-openai": "workspace:*",
"@livekit/agents-plugins-test": "workspace:*",
"@livekit/rtc-node": "^0.13.12",
"@microsoft/api-extractor": "^7.35.0",
"@types/ws": "^8.5.10",
"tsup": "^8.3.5",
"typescript": "^5.0.0"
},
"dependencies": {
"ws": "^8.16.0"
},
"peerDependencies": {
"@livekit/agents": "workspace:*",
"@livekit/rtc-node": "^0.13.12"
}
}
20 changes: 20 additions & 0 deletions plugins/hathora/src/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
// SPDX-FileCopyrightText: 2025 LiveKit, Inc.
//
// SPDX-License-Identifier: Apache-2.0
import { Plugin } from '@livekit/agents';

export * from './tts.js';
export * from './stt.js';
export * from './utils.js';

class HathoraPlugin extends Plugin {
constructor() {
super({
title: 'hathora',
version: '0.1.0',
package: '@livekit/agents-plugin-hathora',
});
}
}

Plugin.registerPlugin(new HathoraPlugin());
145 changes: 145 additions & 0 deletions plugins/hathora/src/stt.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
import { type AudioBuffer, stt } from '@livekit/agents';
import { combineAudioFrames, type AudioFrame } from '@livekit/rtc-node';
import type { ConfigOption } from './utils.js';

const API_URL = 'https://api.models.hathora.dev/inference/v1/stt';
const AUTHORIZATION_HEADER = 'Authorization';

/**
* @interface STTOptions - Options for configuring the Hathora STT service.
* @property model - Model to use; find available models [here](https://models.hathora.dev).
* @property [language] - Language code (if supported by model).
* @property [modelConfig] - Some models support additional config, refer to [docs](https://models.hathora.dev)
* for each model to see what is supported.
* @property [baseURL] - Base API URL for the Hathora STT service.
* @property [apiKey] - API key for authentication with the Hathora service;
* provision one [here](https://models.hathora.dev/tokens).
*/
export interface STTOptions {
/** Model to use; find available models [here](https://models.hathora.dev).*/
model: string;
/** Language code (if supported by model). */
language?: string;
/** Some models support additional config, refer to [docs](https://models.hathora.dev)
* for each model to see what is supported. */
modelConfig?: ConfigOption[];
/** Base API URL for the Hathora TTS service. */
baseURL?: string;
/** API key for authentication with the Hathora service;
* provision one [here](https://models.hathora.dev/tokens). */
apiKey?: string;
}

const defaultSTTOptions: Partial<STTOptions> = {
baseURL: API_URL,
apiKey: process.env.HATHORA_API_KEY,
};

/**
* This service supports several different speech-to-text models hosted by Hathora.
*
* [Documentation](https://models.hathora.dev)
*/
export class STT extends stt.STT {
label = 'hathora.STT';
#opts: STTOptions;
#url: URL;

constructor(opts: STTOptions) {
super({ streaming: false, interimResults: false });

this.#opts = {
...defaultSTTOptions,
...opts
};

if (opts.baseURL === undefined) {
this.#opts.baseURL = API_URL;
}

// remove trailing slash from baseURL
const baseURL = this.#opts.baseURL!.replace(/\/$/, '');

this.#url = new URL(baseURL);

if (this.#opts.apiKey === undefined) {
throw new Error('Hathora API key is required, whether as an argument or as $HATHORA_API_KEY');
}
}

#createWav(frame: AudioFrame): Buffer {
const bitsPerSample = 16;
const byteRate = (frame.sampleRate * frame.channels * bitsPerSample) / 8;
const blockAlign = (frame.channels * bitsPerSample) / 8;

const header = Buffer.alloc(44);
header.write('RIFF', 0);
header.writeUInt32LE(36 + frame.data.byteLength, 4);
header.write('WAVE', 8);
header.write('fmt ', 12);
header.writeUInt32LE(16, 16);
header.writeUInt16LE(1, 20);
header.writeUInt16LE(frame.channels, 22);
header.writeUInt32LE(frame.sampleRate, 24);
header.writeUInt32LE(byteRate, 28);
header.writeUInt16LE(blockAlign, 32);
header.writeUInt16LE(16, 34);
header.write('data', 36);
header.writeUInt32LE(frame.data.byteLength, 40);
return Buffer.concat([header, Buffer.from(frame.data.buffer)]);
}

async _recognize(buffer: AudioBuffer, abortSignal?: AbortSignal): Promise<stt.SpeechEvent> {
const headers: HeadersInit = {
[AUTHORIZATION_HEADER]: `Bearer ${this.#opts.apiKey!}`,
'Content-Type': 'application/json',
};

let body: any = {
model: this.#opts.model,
};

if (this.#opts.language) {
body.language = this.#opts.language;
}

if (this.#opts.modelConfig) {
body.model_config = this.#opts.modelConfig;
}

body.audio = this.#createWav(combineAudioFrames(buffer)).toString('base64');

const response = await fetch(
this.#url,
{
method: 'POST',
headers,
body: JSON.stringify(body),
signal: abortSignal,
},
);

if (!response.ok) {
throw new Error(`STT request failed: ${response.status} ${response.statusText}`);
}

const result = await response.json();

return {
type: stt.SpeechEventType.FINAL_TRANSCRIPT,
alternatives: [
{
text: result.text || '',
language: this.#opts.language || '',
startTime: 0,
endTime: 0,
confidence: 0,
},
],
};
}

stream(): stt.SpeechStream {
throw new Error('Streaming is not supported on Hathora STT');
}
}
Loading