livekit · mikeseese · Dec 4, 2025 · Dec 11, 2025 · Dec 12, 2025 · Jan 6, 2026
diff --git a/README.md b/README.md
@@ -66,6 +66,7 @@ Currently, only the following plugins are supported:
 | [@livekit/agents-plugin-openai](https://www.npmjs.com/package/@livekit/agents-plugin-openai)         | LLM, TTS, STT |
 | [@livekit/agents-plugin-google](https://www.npmjs.com/package/@livekit/agents-plugin-google)         | LLM, TTS      |
 | [@livekit/agents-plugin-deepgram](https://www.npmjs.com/package/@livekit/agents-plugin-deepgram)     | STT, TTS      |
+| [@livekit/agents-plugin-hathora](https://www.npmjs.com/package/@livekit/agents-plugin-hathora)       | STT, TTS      |
 | [@livekit/agents-plugin-elevenlabs](https://www.npmjs.com/package/@livekit/agents-plugin-elevenlabs) | TTS           |
 | [@livekit/agents-plugin-cartesia](https://www.npmjs.com/package/@livekit/agents-plugin-cartesia)     | TTS           |
 | [@livekit/agents-plugin-neuphonic](https://www.npmjs.com/package/@livekit/agents-plugin-neuphonic)   | TTS           |

diff --git a/plugins/hathora/README.md b/plugins/hathora/README.md
@@ -0,0 +1,17 @@
+<!--
+SPDX-FileCopyrightText: 2024 LiveKit, Inc.
+
+SPDX-License-Identifier: Apache-2.0
+-->
+# Hathora plugin for LiveKit Agents
+
+The Agents Framework is designed for building realtime, programmable
+participants that run on servers. Use it to create conversational, multi-modal
+voice agents that can see, hear, and understand.
+
+This package contains the Hathora plugin, which allows for voice synthesis and speech recognition.
+Refer to the [documentation](https://docs.livekit.io/agents/overview/) for
+information on how to use it, or browse the [API
+reference](https://docs.livekit.io/agents-js/modules/plugins_agents_plugin_hathora.html).
+See the [repository](https://github.com/livekit/agents-js) for more information
+about the framework as a whole.
diff --git a/plugins/hathora/api-extractor.json b/plugins/hathora/api-extractor.json
@@ -0,0 +1,20 @@
+/**
+ * Config file for API Extractor.  For more info, please visit: https://api-extractor.com
+ */
+{
+  "$schema": "https://developer.microsoft.com/json-schemas/api-extractor/v7/api-extractor.schema.json",
+
+  /**
+   * Optionally specifies another JSON config file that this file extends from.  This provides a way for
+   * standard settings to be shared across multiple projects.
+   *
+   * If the path starts with "./" or "../", the path is resolved relative to the folder of the file that contains
+   * the "extends" field.  Otherwise, the first path segment is interpreted as an NPM package name, and will be
+   * resolved using NodeJS require().
+   *
+   * SUPPORTED TOKENS: none
+   * DEFAULT VALUE: ""
+   */
+  "extends": "../../api-extractor-shared.json",
+  "mainEntryPointFilePath": "./dist/index.d.ts"
+}
diff --git a/plugins/hathora/package.json b/plugins/hathora/package.json
@@ -0,0 +1,53 @@
+{
+  "name": "@livekit/agents-plugin-hathora",
+  "version": "0.1.0",
+  "description": "Hathora plugin for LiveKit Node Agents",
+  "main": "dist/index.js",
+  "require": "dist/index.cjs",
+  "types": "dist/index.d.ts",
+  "exports": {
+    "import": {
+      "types": "./dist/index.d.ts",
+      "default": "./dist/index.js"
+    },
+    "require": {
+      "types": "./dist/index.d.cts",
+      "default": "./dist/index.cjs"
+    }
+  },
+  "author": "LiveKit",
+  "type": "module",
+  "repository": "[email protected]:livekit/agents-js.git",
+  "license": "Apache-2.0",
+  "files": [
+    "dist",
+    "src",
+    "README.md"
+  ],
+  "scripts": {
+    "build": "tsup --onSuccess \"pnpm build:types\"",
+    "build:types": "tsc --declaration --emitDeclarationOnly && node ../../scripts/copyDeclarationOutput.js",
+    "clean": "rm -rf dist",
+    "clean:build": "pnpm clean && pnpm build",
+    "lint": "eslint -f unix \"src/**/*.{ts,js}\"",
+    "api:check": "api-extractor run --typescript-compiler-folder ../../node_modules/typescript",
+    "api:update": "api-extractor run --local --typescript-compiler-folder ../../node_modules/typescript --verbose"
+  },
+  "devDependencies": {
+    "@livekit/agents": "workspace:*",
+    "@livekit/agents-plugin-openai": "workspace:*",
+    "@livekit/agents-plugins-test": "workspace:*",
+    "@livekit/rtc-node": "^0.13.12",
+    "@microsoft/api-extractor": "^7.35.0",
+    "@types/ws": "^8.5.10",
+    "tsup": "^8.3.5",
+    "typescript": "^5.0.0"
+  },
+  "dependencies": {
+    "ws": "^8.16.0"
+  },
+  "peerDependencies": {
+    "@livekit/agents": "workspace:*",
+    "@livekit/rtc-node": "^0.13.12"
+  }
+}
diff --git a/plugins/hathora/src/index.ts b/plugins/hathora/src/index.ts
@@ -0,0 +1,20 @@
+// SPDX-FileCopyrightText: 2025 LiveKit, Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+import { Plugin } from '@livekit/agents';
+
+export * from './tts.js';
+export * from './stt.js';
+export * from './utils.js';
+
+class HathoraPlugin extends Plugin {
+  constructor() {
+    super({
+      title: 'hathora',
+      version: '0.1.0',
+      package: '@livekit/agents-plugin-hathora',
+    });
+  }
+}
+
+Plugin.registerPlugin(new HathoraPlugin());
diff --git a/plugins/hathora/src/stt.ts b/plugins/hathora/src/stt.ts
@@ -0,0 +1,145 @@
+import { type AudioBuffer, stt } from '@livekit/agents';
+import { combineAudioFrames, type AudioFrame } from '@livekit/rtc-node';
+import type { ConfigOption } from './utils.js';
+
+const API_URL = 'https://api.models.hathora.dev/inference/v1/stt';
+const AUTHORIZATION_HEADER = 'Authorization';
+
+/**
+ * @interface STTOptions - Options for configuring the Hathora STT service.
+ * @property model - Model to use; find available models [here](https://models.hathora.dev).
+ * @property [language] - Language code (if supported by model).
+ * @property [modelConfig] - Some models support additional config, refer to [docs](https://models.hathora.dev)
+ *           for each model to see what is supported.
+ * @property [baseURL] - Base API URL for the Hathora STT service.
+ * @property [apiKey] - API key for authentication with the Hathora service;
+ *           provision one [here](https://models.hathora.dev/tokens).
+ */
+export interface STTOptions {
+  /**  Model to use; find available models [here](https://models.hathora.dev).*/
+  model: string;
+  /** Language code (if supported by model). */
+  language?: string;
+  /** Some models support additional config, refer to [docs](https://models.hathora.dev)
+   * for each model to see what is supported. */
+  modelConfig?: ConfigOption[];
+  /** Base API URL for the Hathora TTS service. */
+  baseURL?: string;
+  /** API key for authentication with the Hathora service;
+   * provision one [here](https://models.hathora.dev/tokens). */
+  apiKey?: string;
+}
+
+const defaultSTTOptions: Partial<STTOptions> = {
+  baseURL: API_URL,
+  apiKey: process.env.HATHORA_API_KEY,
+};
+
+/**
+ * This service supports several different speech-to-text models hosted by Hathora.
+ *
+ * [Documentation](https://models.hathora.dev)
+ */
+export class STT extends stt.STT {
+  label = 'hathora.STT';
+  #opts: STTOptions;
+  #url: URL;
+
+  constructor(opts: STTOptions) {
+    super({ streaming: false, interimResults: false });
+
+    this.#opts = {
+      ...defaultSTTOptions,
+      ...opts
+    };
+
+    if (opts.baseURL === undefined) {
+      this.#opts.baseURL = API_URL;
+    }
+
+    // remove trailing slash from baseURL
+    const baseURL = this.#opts.baseURL!.replace(/\/$/, '');
+
+    this.#url = new URL(baseURL);
+
+    if (this.#opts.apiKey === undefined) {
+      throw new Error('Hathora API key is required, whether as an argument or as $HATHORA_API_KEY');
+    }
+  }
+
+  #createWav(frame: AudioFrame): Buffer {
+    const bitsPerSample = 16;
+    const byteRate = (frame.sampleRate * frame.channels * bitsPerSample) / 8;
+    const blockAlign = (frame.channels * bitsPerSample) / 8;
+
+    const header = Buffer.alloc(44);
+    header.write('RIFF', 0);
+    header.writeUInt32LE(36 + frame.data.byteLength, 4);
+    header.write('WAVE', 8);
+    header.write('fmt ', 12);
+    header.writeUInt32LE(16, 16);
+    header.writeUInt16LE(1, 20);
+    header.writeUInt16LE(frame.channels, 22);
+    header.writeUInt32LE(frame.sampleRate, 24);
+    header.writeUInt32LE(byteRate, 28);
+    header.writeUInt16LE(blockAlign, 32);
+    header.writeUInt16LE(16, 34);
+    header.write('data', 36);
+    header.writeUInt32LE(frame.data.byteLength, 40);
+    return Buffer.concat([header, Buffer.from(frame.data.buffer)]);
+  }
+
+  async _recognize(buffer: AudioBuffer, abortSignal?: AbortSignal): Promise<stt.SpeechEvent> {
+    const headers: HeadersInit = {
+      [AUTHORIZATION_HEADER]: `Bearer ${this.#opts.apiKey!}`,
+      'Content-Type': 'application/json',
+    };
+
+    let body: any = {
+      model: this.#opts.model,
+    };
+
+    if (this.#opts.language) {
+      body.language = this.#opts.language;
+    }
+
+    if (this.#opts.modelConfig) {
+      body.model_config = this.#opts.modelConfig;
+    }
+
+    body.audio = this.#createWav(combineAudioFrames(buffer)).toString('base64');
+
+    const response = await fetch(
+      this.#url,
+      {
+        method: 'POST',
+        headers,
+        body: JSON.stringify(body),
+        signal: abortSignal,
+      },
+    );
+
+    if (!response.ok) {
+      throw new Error(`STT request failed: ${response.status} ${response.statusText}`);
+    }
+
+    const result = await response.json();
+
+    return {
+      type: stt.SpeechEventType.FINAL_TRANSCRIPT,
+      alternatives: [
+        {
+          text: result.text || '',
+          language: this.#opts.language || '',
+          startTime: 0,
+          endTime: 0,
+          confidence: 0,
+        },
+      ],
+    };
+  }
+
+  stream(): stt.SpeechStream {
+    throw new Error('Streaming is not supported on Hathora STT');
+  }
+}