Skip to content

Commit 3e1f895

Browse files
feat(voice-server): add TTS provider abstraction with fallback order
Adds a pluggable TTS provider system that allows multiple backends with configurable fallback order. Includes cross-platform audio playback support. Providers: - ElevenLabs (cloud) - existing functionality extracted to provider class - Piper (local) - free offline TTS using neural voice models - MacOS (local) - native macOS say command Cross-platform audio: - macOS: afplay - Linux: aplay - WSL: powershell Media.SoundPlayer Configuration via config.json with fallback order: {providers: [piper, elevenlabs, macos]} First available provider is used automatically. Falls back to direct ElevenLabs API if no provider configured (backwards compatible). πŸ€– Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <[email protected]>
1 parent b70e808 commit 3e1f895

File tree

6 files changed

+248
-16
lines changed

6 files changed

+248
-16
lines changed
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
{
2+
"providers": ["piper", "elevenlabs", "macos-say"]
3+
}
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
import { spawn } from 'child_process';
2+
import type { TTSProvider } from './index';
3+
4+
const ELEVENLABS_API_KEY = process.env.ELEVENLABS_API_KEY;
5+
const DEFAULT_VOICE_ID = process.env.ELEVENLABS_VOICE_ID || 's3TPKV1kjDlVtZbl4Ksh';
6+
const DEFAULT_MODEL = process.env.ELEVENLABS_MODEL || 'eleven_multilingual_v2';
7+
8+
export class ElevenLabs implements TTSProvider {
9+
readonly name = 'elevenlabs';
10+
11+
isAvailable(): boolean {
12+
return !!ELEVENLABS_API_KEY;
13+
}
14+
15+
async speak(text: string, voiceId?: string): Promise<void> {
16+
if (!ELEVENLABS_API_KEY) {
17+
throw new Error('ElevenLabs API key not configured');
18+
}
19+
20+
const voice = voiceId || DEFAULT_VOICE_ID;
21+
const url = `https://api.elevenlabs.io/v1/text-to-speech/${voice}`;
22+
23+
const response = await fetch(url, {
24+
method: 'POST',
25+
headers: {
26+
'Accept': 'audio/mpeg',
27+
'Content-Type': 'application/json',
28+
'xi-api-key': ELEVENLABS_API_KEY,
29+
},
30+
body: JSON.stringify({
31+
text: text,
32+
model_id: DEFAULT_MODEL,
33+
voice_settings: {
34+
stability: 0.5,
35+
similarity_boost: 0.5,
36+
},
37+
}),
38+
});
39+
40+
if (!response.ok) {
41+
const errorText = await response.text();
42+
if (errorText.includes('model') || response.status === 422) {
43+
throw new Error(`ElevenLabs API error: Invalid model "${DEFAULT_MODEL}". Update ELEVENLABS_MODEL in ~/.env. See https://elevenlabs.io/docs/models`);
44+
}
45+
throw new Error(`ElevenLabs API error: ${response.status} - ${errorText}`);
46+
}
47+
48+
const audioBuffer = await response.arrayBuffer();
49+
const tempFile = `/tmp/voice-${Date.now()}.mp3`;
50+
await Bun.write(tempFile, audioBuffer);
51+
52+
return new Promise((resolve, reject) => {
53+
const proc = spawn('/usr/bin/afplay', [tempFile]);
54+
proc.on('error', (error) => {
55+
console.error('Error playing audio:', error);
56+
reject(error);
57+
});
58+
proc.on('exit', (code) => {
59+
spawn('/bin/rm', [tempFile]);
60+
if (code === 0) resolve();
61+
else reject(new Error(`afplay exited with code ${code}`));
62+
});
63+
});
64+
}
65+
}
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
import { spawn } from 'child_process';
2+
import { platform } from 'os';
3+
import type { TTSProvider } from './index';
4+
5+
export class MacOSSay implements TTSProvider {
6+
readonly name = 'macos-say';
7+
8+
isAvailable(): boolean {
9+
return platform() === 'darwin';
10+
}
11+
12+
async speak(text: string, voiceId?: string): Promise<void> {
13+
const voice = voiceId || 'Samantha';
14+
15+
return new Promise((resolve, reject) => {
16+
const proc = spawn('/usr/bin/say', ['-v', voice, text]);
17+
proc.on('error', reject);
18+
proc.on('exit', (code) => {
19+
code === 0 ? resolve() : reject(new Error(`say exited with code ${code}`));
20+
});
21+
});
22+
}
23+
}
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
import { spawn, spawnSync } from 'child_process';
2+
import { existsSync, readFileSync } from 'fs';
3+
import { join } from 'path';
4+
import { platform, release } from 'os';
5+
import type { TTSProvider } from './index';
6+
7+
const IS_WSL = platform() === 'linux' && release().toLowerCase().includes('microsoft');
8+
9+
interface VoiceConfig {
10+
model: string;
11+
speaker: number;
12+
}
13+
14+
export class Piper implements TTSProvider {
15+
readonly name = 'piper';
16+
private baseDir: string;
17+
private binary: string;
18+
private modelsDir: string;
19+
private voices: Record<string, VoiceConfig> = {};
20+
21+
constructor(baseDir?: string) {
22+
this.baseDir = baseDir || join(import.meta.dir, '..');
23+
const configPath = join(this.baseDir, 'voices.json');
24+
25+
if (existsSync(configPath)) {
26+
const config = JSON.parse(readFileSync(configPath, 'utf-8'));
27+
this.binary = join(this.baseDir, config.piper?.binary || 'piper-bin/piper/piper');
28+
this.modelsDir = join(this.baseDir, config.piper?.models_dir || 'piper-voices');
29+
this.voices = config.voices || {};
30+
} else {
31+
this.binary = join(this.baseDir, 'piper-bin/piper/piper');
32+
this.modelsDir = join(this.baseDir, 'piper-voices');
33+
}
34+
}
35+
36+
isAvailable(): boolean {
37+
return existsSync(this.binary);
38+
}
39+
40+
async speak(text: string, voiceId?: string): Promise<void> {
41+
const voice = this.voices[voiceId || 'default'] || { model: 'en_US-libritts_r-medium', speaker: 0 };
42+
const modelPath = join(this.modelsDir, `${voice.model}.onnx`);
43+
44+
if (!existsSync(modelPath)) {
45+
throw new Error(`Piper model not found: ${modelPath}`);
46+
}
47+
48+
const result = spawnSync(this.binary, [
49+
'--model', modelPath,
50+
'--speaker', voice.speaker.toString(),
51+
'--output-raw'
52+
], { input: text, maxBuffer: 10 * 1024 * 1024 });
53+
54+
if (result.error) throw new Error(`Piper error: ${result.error.message}`);
55+
if (result.status !== 0) throw new Error(`Piper failed: ${result.stderr?.toString()}`);
56+
57+
const wavBuffer = this.pcmToWav(result.stdout);
58+
await this.playAudio(wavBuffer);
59+
}
60+
61+
private pcmToWav(pcm: Buffer): Buffer {
62+
const header = Buffer.alloc(44);
63+
header.write('RIFF', 0);
64+
header.writeUInt32LE(36 + pcm.length, 4);
65+
header.write('WAVE', 8);
66+
header.write('fmt ', 12);
67+
header.writeUInt32LE(16, 16);
68+
header.writeUInt16LE(1, 20);
69+
header.writeUInt16LE(1, 22);
70+
header.writeUInt32LE(22050, 24);
71+
header.writeUInt32LE(44100, 28);
72+
header.writeUInt16LE(2, 32);
73+
header.writeUInt16LE(16, 34);
74+
header.write('data', 36);
75+
header.writeUInt32LE(pcm.length, 40);
76+
return Buffer.concat([header, pcm]);
77+
}
78+
79+
private async playAudio(wav: Buffer): Promise<void> {
80+
const tempFile = `/tmp/voice-${Date.now()}.wav`;
81+
await Bun.write(tempFile, wav);
82+
83+
if (IS_WSL) {
84+
const winPath = `C:\\Users\\Public\\piper_${Date.now()}.wav`;
85+
const wslPath = `/mnt/c/Users/Public/piper_${Date.now()}.wav`;
86+
await Bun.write(wslPath, wav);
87+
88+
return new Promise((resolve, reject) => {
89+
const proc = spawn('powershell.exe', ['-NoProfile', '-Command',
90+
`(New-Object Media.SoundPlayer '${winPath}').PlaySync(); Remove-Item '${winPath}'`]);
91+
proc.on('error', reject);
92+
proc.on('exit', (code) => {
93+
spawn('/bin/rm', ['-f', tempFile]);
94+
code === 0 ? resolve() : reject(new Error(`powershell exited ${code}`));
95+
});
96+
});
97+
}
98+
99+
return new Promise((resolve, reject) => {
100+
const proc = spawn('aplay', ['-q', tempFile]);
101+
proc.on('error', reject);
102+
proc.on('exit', (code) => {
103+
spawn('/bin/rm', ['-f', tempFile]);
104+
code === 0 ? resolve() : reject(new Error(`aplay exited ${code}`));
105+
});
106+
});
107+
}
108+
}
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
export interface TTSProvider {
2+
readonly name: string;
3+
isAvailable(): boolean;
4+
speak(text: string, voiceId?: string): Promise<void>;
5+
}
6+
7+
export { ElevenLabs } from './ElevenLabs';
8+
export { Piper } from './Piper';
9+
export { MacOSSay } from './MacOSSay';

β€Ž.claude/voice-server/server.tsβ€Ž

Lines changed: 40 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@ import { serve } from "bun";
77
import { spawn } from "child_process";
88
import { homedir } from "os";
99
import { join } from "path";
10-
import { existsSync } from "fs";
10+
import { existsSync, readFileSync } from "fs";
11+
import { ElevenLabs, Piper, MacOSSay, type TTSProvider } from "./providers";
1112

1213
// Load .env from user home directory
1314
const envPath = join(homedir(), '.env');
@@ -24,9 +25,32 @@ if (existsSync(envPath)) {
2425
const PORT = parseInt(process.env.PORT || "8888");
2526
const ELEVENLABS_API_KEY = process.env.ELEVENLABS_API_KEY;
2627

27-
if (!ELEVENLABS_API_KEY) {
28-
console.error('⚠️ ELEVENLABS_API_KEY not found in ~/.env');
29-
console.error('Add: ELEVENLABS_API_KEY=your_key_here');
28+
// Load TTS provider from config
29+
const PROVIDERS: Record<string, () => TTSProvider> = {
30+
'elevenlabs': () => new ElevenLabs(),
31+
'piper': () => new Piper(),
32+
'macos-say': () => new MacOSSay(),
33+
};
34+
35+
let provider: TTSProvider | null = null;
36+
const configPath = join(import.meta.dir, 'config.json');
37+
if (existsSync(configPath)) {
38+
const config = JSON.parse(readFileSync(configPath, 'utf-8'));
39+
for (const name of config.providers || []) {
40+
const create = PROVIDERS[name];
41+
if (create) {
42+
const p = create();
43+
if (p.isAvailable()) {
44+
provider = p;
45+
break;
46+
}
47+
}
48+
}
49+
}
50+
51+
if (!provider && !ELEVENLABS_API_KEY) {
52+
console.error('⚠️ No TTS provider available');
53+
console.error('Configure providers in config.json or add ELEVENLABS_API_KEY to ~/.env');
3054
}
3155

3256
// Default voice ID (Kai's voice)
@@ -174,14 +198,18 @@ async function sendNotification(
174198
const safeTitle = sanitizeForShell(title);
175199
const safeMessage = sanitizeForShell(message);
176200

177-
// Generate and play voice using ElevenLabs
178-
if (voiceEnabled && ELEVENLABS_API_KEY) {
201+
// Generate and play voice
202+
if (voiceEnabled) {
179203
try {
180204
const voice = voiceId || DEFAULT_VOICE_ID;
181-
console.log(`πŸŽ™οΈ Generating speech with ElevenLabs (voice: ${voice})`);
182-
183-
const audioBuffer = await generateSpeech(safeMessage, voice);
184-
await playAudio(audioBuffer);
205+
if (provider) {
206+
console.log(`πŸŽ™οΈ Generating speech with ${provider.name} (voice: ${voice})`);
207+
await provider.speak(safeMessage, voice);
208+
} else if (ELEVENLABS_API_KEY) {
209+
console.log(`πŸŽ™οΈ Generating speech with ElevenLabs (voice: ${voice})`);
210+
const audioBuffer = await generateSpeech(safeMessage, voice);
211+
await playAudio(audioBuffer);
212+
}
185213
} catch (error) {
186214
console.error("Failed to generate/play speech:", error);
187215
}
@@ -315,9 +343,7 @@ const server = serve({
315343
JSON.stringify({
316344
status: "healthy",
317345
port: PORT,
318-
voice_system: "ElevenLabs",
319-
model: DEFAULT_MODEL,
320-
default_voice_id: DEFAULT_VOICE_ID,
346+
provider: provider?.name || "elevenlabs",
321347
api_key_configured: !!ELEVENLABS_API_KEY
322348
}),
323349
{
@@ -335,7 +361,5 @@ const server = serve({
335361
});
336362

337363
console.log(`πŸš€ PAIVoice Server running on port ${PORT}`);
338-
console.log(`πŸŽ™οΈ Using ElevenLabs TTS (model: ${DEFAULT_MODEL}, voice: ${DEFAULT_VOICE_ID})`);
364+
console.log(`πŸŽ™οΈ TTS Provider: ${provider?.name || 'elevenlabs'}`);
339365
console.log(`πŸ“‘ POST to http://localhost:${PORT}/notify`);
340-
console.log(`πŸ”’ Security: CORS restricted to localhost, rate limiting enabled`);
341-
console.log(`πŸ”‘ API Key: ${ELEVENLABS_API_KEY ? 'βœ… Configured' : '❌ Missing'}`);

0 commit comments

Comments
Β (0)