diff --git a/Releases/v4.0.3/.claude/PAI-Install/engine/actions.ts b/Releases/v4.0.3/.claude/PAI-Install/engine/actions.ts index 73ab5e212..5301324f7 100644 --- a/Releases/v4.0.3/.claude/PAI-Install/engine/actions.ts +++ b/Releases/v4.0.3/.claude/PAI-Install/engine/actions.ts @@ -10,7 +10,7 @@ import { homedir } from "os"; import { join, basename } from "path"; import type { InstallState, EngineEventHandler, DetectionResult } from "./types"; import { PAI_VERSION, ALGORITHM_VERSION } from "./types"; -import { detectSystem, validateElevenLabsKey } from "./detect"; +import { detectSystem, validateElevenLabsKey, validateGoogleCloudKey } from "./detect"; import { generateSettingsJson } from "./config-gen"; /** @@ -866,30 +866,87 @@ export async function runVoiceSetup( ): Promise { await emit({ event: "step_start", step: "voice" }); - // ── Collect ElevenLabs key if not already found ── - if (!state.collected.elevenLabsKey) { - await emit({ event: "progress", step: "voice", percent: 5, detail: "Searching for existing ElevenLabs key..." }); - let elevenLabsKey = findExistingEnvKey("ELEVENLABS_API_KEY"); + const paiDir = state.detection?.paiDir || join(homedir(), ".claude"); + const settingsPath = join(paiDir, "settings.json"); + + // ── Choose TTS provider ── + await emit({ event: "progress", step: "voice", percent: 5, detail: "Choose your TTS provider..." }); + + const providerChoice = await getChoice("tts-provider", "Voice requires a TTS provider. Choose one:", [ + { label: "ElevenLabs", value: "elevenlabs", description: "High-quality AI voices (10K free chars/month, paid plans available)" }, + { label: "Google Cloud TTS", value: "google-cloud", description: "Neural2/WaveNet voices (4M free chars/month Standard, 1M WaveNet)" }, + { label: "Skip voice for now", value: "skip", description: "You can configure voice later in settings.json" }, + ]); + + if (providerChoice === "skip") { + state.collected.ttsProvider = undefined; + await emit({ event: "message", content: "Voice skipped. You can add a TTS provider later in ~/.claude/settings.json" }); + await emit({ event: "step_complete", step: "voice" }); + return; + } + + state.collected.ttsProvider = providerChoice as "elevenlabs" | "google-cloud"; - if (elevenLabsKey) { - await emit({ event: "message", content: "Found existing ElevenLabs API key. Validating..." }); - const result = await validateElevenLabsKey(elevenLabsKey); + // ── Collect API key for chosen provider ── + let hasApiKey = false; + + if (providerChoice === "google-cloud") { + // ── Google Cloud TTS key collection ── + await emit({ event: "progress", step: "voice", percent: 10, detail: "Searching for existing Google Cloud key..." }); + let googleKey = findExistingEnvKey("GOOGLE_CLOUD_API_KEY") || findExistingEnvKey("GOOGLE_API_KEY"); + + if (googleKey) { + await emit({ event: "message", content: "Found existing Google Cloud API key. Validating..." }); + const result = await validateGoogleCloudKey(googleKey); if (result.valid) { - state.collected.elevenLabsKey = elevenLabsKey; - await emit({ event: "message", content: "Existing ElevenLabs API key is valid." }); + state.collected.googleCloudKey = googleKey; + await emit({ event: "message", content: "Existing Google Cloud API key is valid." }); } else { await emit({ event: "message", content: `Existing key invalid: ${result.error}.` }); - elevenLabsKey = ""; + googleKey = ""; } } - if (!elevenLabsKey) { - const wantsVoice = await getChoice("voice-enable", "Voice requires an ElevenLabs API key. Get one free at elevenlabs.io", [ - { label: "I have a key", value: "yes" }, - { label: "Skip voice for now", value: "skip" }, - ]); + if (!googleKey) { + const key = await getInput( + "google-cloud-key", + "Enter your Google Cloud API key:\nEnable the Text-to-Speech API at console.cloud.google.com", + "key", + "AIza..." + ); - if (wantsVoice === "yes") { + if (key.trim()) { + await emit({ event: "progress", step: "voice", percent: 15, detail: "Validating Google Cloud key..." }); + const result = await validateGoogleCloudKey(key.trim()); + if (result.valid) { + state.collected.googleCloudKey = key.trim(); + await emit({ event: "message", content: "Google Cloud API key verified." }); + } else { + await emit({ event: "message", content: `Key validation failed: ${result.error}. Voice may not work until a valid key is provided.` }); + } + } + } + + hasApiKey = !!state.collected.googleCloudKey; + } else { + // ── ElevenLabs key collection (existing flow) ── + if (!state.collected.elevenLabsKey) { + await emit({ event: "progress", step: "voice", percent: 10, detail: "Searching for existing ElevenLabs key..." }); + let elevenLabsKey = findExistingEnvKey("ELEVENLABS_API_KEY"); + + if (elevenLabsKey) { + await emit({ event: "message", content: "Found existing ElevenLabs API key. Validating..." }); + const result = await validateElevenLabsKey(elevenLabsKey); + if (result.valid) { + state.collected.elevenLabsKey = elevenLabsKey; + await emit({ event: "message", content: "Existing ElevenLabs API key is valid." }); + } else { + await emit({ event: "message", content: `Existing key invalid: ${result.error}.` }); + elevenLabsKey = ""; + } + } + + if (!elevenLabsKey) { const key = await getInput( "elevenlabs-key", "Enter your ElevenLabs API key:", @@ -904,109 +961,122 @@ export async function runVoiceSetup( state.collected.elevenLabsKey = key.trim(); await emit({ event: "message", content: "ElevenLabs API key verified." }); } else { - await emit({ event: "message", content: `Key validation failed: ${result.error}. Skipping voice setup.` }); + await emit({ event: "message", content: `Key validation failed: ${result.error}. Voice may not work until a valid key is provided.` }); } } } } + + hasApiKey = !!state.collected.elevenLabsKey; } - const hasElevenLabsKey = !!state.collected.elevenLabsKey; - if (!hasElevenLabsKey) { - await emit({ event: "message", content: "No ElevenLabs key — voice server will use macOS text-to-speech as fallback. You can add a key later in ~/.config/PAI/.env" }); + if (!hasApiKey) { + await emit({ event: "message", content: `No ${providerChoice === "google-cloud" ? "Google Cloud" : "ElevenLabs"} key — voice will need manual configuration. Add the key to ~/.config/PAI/.env` }); } - // ── Start voice server (works with or without ElevenLabs key) ── - const paiDir = state.detection?.paiDir || join(homedir(), ".claude"); + // ── Start voice server ── await emit({ event: "progress", step: "voice", percent: 25, detail: "Starting voice server..." }); const voiceServerReady = await startVoiceServer(paiDir, emit); - // ── Digital Assistant Voice selection ── - await emit({ event: "progress", step: "voice", percent: 40, detail: "Checking for existing voice configuration..." }); + // ── Voice selection (ElevenLabs only — Google Cloud uses Neural2-D default) ── + let selectedVoiceId = ""; - const voiceIds: Record = { - male: "pNInz6obpgDQGcFmaJgB", - female: "21m00Tcm4TlvDq8ikWAM", - }; + if (providerChoice === "google-cloud") { + // Google Cloud: use default Neural2-D voice, configurable later in settings.json + selectedVoiceId = ""; // Not applicable for Google Cloud + await emit({ event: "message", content: "Google Cloud TTS will use Neural2-D voice (en-US). Configurable in settings.json → daidentity.googleCloudVoice" }); + } else { + // ElevenLabs: existing voice selection flow + await emit({ event: "progress", step: "voice", percent: 40, detail: "Checking for existing voice configuration..." }); - let selectedVoiceId: string; + const voiceIds: Record = { + male: "pNInz6obpgDQGcFmaJgB", + female: "21m00Tcm4TlvDq8ikWAM", + }; - // Check for existing voice config from previous installations - const existingVoice = findExistingVoiceConfig(); + // Check for existing voice config from previous installations + const existingVoice = findExistingVoiceConfig(); - if (existingVoice) { - const sourceLabel = existingVoice.aiName - ? `${existingVoice.aiName}'s voice (${existingVoice.voiceId.substring(0, 8)}...)` - : `Voice ID ${existingVoice.voiceId.substring(0, 8)}...`; - await emit({ event: "message", content: `Found existing voice configuration from ~/${existingVoice.source}` }); + if (existingVoice) { + const sourceLabel = existingVoice.aiName + ? `${existingVoice.aiName}'s voice (${existingVoice.voiceId.substring(0, 8)}...)` + : `Voice ID ${existingVoice.voiceId.substring(0, 8)}...`; + await emit({ event: "message", content: `Found existing voice configuration from ~/${existingVoice.source}` }); - const useExisting = await getChoice("voice-existing", `Your DA was using: ${sourceLabel}. Use the same voice?`, [ - { label: "Yes, keep this voice", value: "keep", description: `Voice ID: ${existingVoice.voiceId}` }, - { label: "No, pick a new voice", value: "new", description: "Choose from presets or enter a custom ID" }, - ]); + const useExisting = await getChoice("voice-existing", `Your DA was using: ${sourceLabel}. Use the same voice?`, [ + { label: "Yes, keep this voice", value: "keep", description: `Voice ID: ${existingVoice.voiceId}` }, + { label: "No, pick a new voice", value: "new", description: "Choose from presets or enter a custom ID" }, + ]); - if (useExisting === "keep") { - selectedVoiceId = existingVoice.voiceId; - state.collected.voiceType = "custom"; - state.collected.customVoiceId = selectedVoiceId; - } else { - // Fall through to voice selection below - selectedVoiceId = ""; + if (useExisting === "keep") { + selectedVoiceId = existingVoice.voiceId; + state.collected.voiceType = "custom"; + state.collected.customVoiceId = selectedVoiceId; + } } - } else { - selectedVoiceId = ""; - } - // Voice selection (if not using existing) - if (!selectedVoiceId) { - await emit({ event: "progress", step: "voice", percent: 45, detail: "Choose your Digital Assistant's voice..." }); - - const voiceType = await getChoice("voice-type", "Digital Assistant Voice — Choose a voice for your AI assistant:", [ - { label: "Female (Rachel)", value: "female", description: "Warm, articulate female voice" }, - { label: "Male (Adam)", value: "male", description: "Clear, confident male voice" }, - { label: "Custom Voice ID", value: "custom", description: "Enter your own ElevenLabs voice ID" }, - ]); - - if (voiceType === "custom") { - const customId = await getInput( - "custom-voice-id", - "Enter your ElevenLabs Voice ID:\nFind it at: elevenlabs.io/app/voice-library → Your voice → Voice ID", - "text", - "e.g., s3TPKV1kjDlVtZbl4Ksh" - ); - selectedVoiceId = customId.trim() || voiceIds.female; - state.collected.voiceType = "custom"; - state.collected.customVoiceId = selectedVoiceId; - } else { - selectedVoiceId = voiceIds[voiceType] || voiceIds.female; - state.collected.voiceType = voiceType as any; + // Voice selection (if not using existing) + if (!selectedVoiceId) { + await emit({ event: "progress", step: "voice", percent: 45, detail: "Choose your Digital Assistant's voice..." }); + + const voiceType = await getChoice("voice-type", "Digital Assistant Voice — Choose a voice for your AI assistant:", [ + { label: "Female (Rachel)", value: "female", description: "Warm, articulate female voice" }, + { label: "Male (Adam)", value: "male", description: "Clear, confident male voice" }, + { label: "Custom Voice ID", value: "custom", description: "Enter your own ElevenLabs voice ID" }, + ]); + + if (voiceType === "custom") { + const customId = await getInput( + "custom-voice-id", + "Enter your ElevenLabs Voice ID:\nFind it at: elevenlabs.io/app/voice-library → Your voice → Voice ID", + "text", + "e.g., s3TPKV1kjDlVtZbl4Ksh" + ); + selectedVoiceId = customId.trim() || voiceIds.female; + state.collected.voiceType = "custom"; + state.collected.customVoiceId = selectedVoiceId; + } else { + selectedVoiceId = voiceIds[voiceType] || voiceIds.female; + state.collected.voiceType = voiceType as any; + } } } - // ── Update settings.json with voice ID ── + // ── Update settings.json with voice + provider config ── await emit({ event: "progress", step: "voice", percent: 60, detail: "Saving voice configuration..." }); - const settingsPath = join(paiDir, "settings.json"); if (existsSync(settingsPath)) { try { const settings = JSON.parse(readFileSync(settingsPath, "utf-8")); if (settings.daidentity) { - settings.daidentity.voiceId = selectedVoiceId; - settings.daidentity.voices = settings.daidentity.voices || {}; - settings.daidentity.voices.main = { - voiceId: selectedVoiceId, - stability: 0.35, - similarityBoost: 0.80, - style: 0.90, - speed: 1.1, - }; - settings.daidentity.voices.algorithm = { - voiceId: selectedVoiceId, - stability: 0.35, - similarityBoost: 0.80, - style: 0.90, - speed: 1.1, - }; + settings.daidentity.ttsProvider = providerChoice; + + if (providerChoice === "google-cloud") { + settings.daidentity.googleCloudVoice = { + languageCode: "en-US", + voiceName: "en-US-Neural2-D", + voiceType: "NEURAL2", + speakingRate: 1.0, + pitch: 0.0, + }; + } else { + settings.daidentity.voiceId = selectedVoiceId; + settings.daidentity.voices = settings.daidentity.voices || {}; + settings.daidentity.voices.main = { + voiceId: selectedVoiceId, + stability: 0.35, + similarityBoost: 0.80, + style: 0.90, + speed: 1.1, + }; + settings.daidentity.voices.algorithm = { + voiceId: selectedVoiceId, + stability: 0.35, + similarityBoost: 0.80, + style: 0.90, + speed: 1.1, + }; + } } writeFileSync(settingsPath, JSON.stringify(settings, null, 2)); await emit({ event: "message", content: "Voice settings saved to settings.json." }); @@ -1015,17 +1085,26 @@ export async function runVoiceSetup( } } - // ── Save ElevenLabs key to .env (if provided) ── - if (hasElevenLabsKey) { + // ── Save API key to .env ── + if (hasApiKey) { const configDir = state.detection?.configDir || join(homedir(), ".config", "PAI"); const envPath = join(configDir, ".env"); if (!existsSync(configDir)) mkdirSync(configDir, { recursive: true }); let envContent = existsSync(envPath) ? readFileSync(envPath, "utf-8") : ""; - if (envContent.includes("ELEVENLABS_API_KEY=")) { - envContent = envContent.replace(/ELEVENLABS_API_KEY=.*/, `ELEVENLABS_API_KEY=${state.collected.elevenLabsKey}`); - } else { - envContent = envContent.trim() + `\nELEVENLABS_API_KEY=${state.collected.elevenLabsKey}\n`; + + if (providerChoice === "google-cloud" && state.collected.googleCloudKey) { + if (envContent.includes("GOOGLE_CLOUD_API_KEY=")) { + envContent = envContent.replace(/GOOGLE_CLOUD_API_KEY=.*/, `GOOGLE_CLOUD_API_KEY=${state.collected.googleCloudKey}`); + } else { + envContent = envContent.trim() + `\nGOOGLE_CLOUD_API_KEY=${state.collected.googleCloudKey}\n`; + } + } else if (state.collected.elevenLabsKey) { + if (envContent.includes("ELEVENLABS_API_KEY=")) { + envContent = envContent.replace(/ELEVENLABS_API_KEY=.*/, `ELEVENLABS_API_KEY=${state.collected.elevenLabsKey}`); + } else { + envContent = envContent.trim() + `\nELEVENLABS_API_KEY=${state.collected.elevenLabsKey}\n`; + } } writeFileSync(envPath, envContent.trim() + "\n", { mode: 0o600 }); @@ -1053,32 +1132,38 @@ export async function runVoiceSetup( try { const aiName = state.collected.aiName || "PAI"; const userName = state.collected.principalName || "there"; + + // Build test payload based on provider + const testPayload: Record = { + message: `Hello ${userName}, this is ${aiName}. My voice system is online and ready to assist you.`, + }; + if (providerChoice === "elevenlabs" && selectedVoiceId) { + testPayload.voice_id = selectedVoiceId; + testPayload.voice_settings = { stability: 0.35, similarity_boost: 0.80, style: 0.90, speed: 1.1, use_speaker_boost: true }; + } + const testRes = await fetch("http://localhost:8888/notify", { method: "POST", headers: { "Content-Type": "application/json" }, - body: JSON.stringify({ - message: `Hello ${userName}, this is ${aiName}. My voice system is online and ready to assist you.`, - voice_id: selectedVoiceId, - voice_settings: { stability: 0.35, similarity_boost: 0.80, style: 0.90, speed: 1.1, use_speaker_boost: true }, - }), - signal: AbortSignal.timeout(10000), + body: JSON.stringify(testPayload), + signal: AbortSignal.timeout(15000), }); if (testRes.ok) { await emit({ event: "message", content: `Voice test sent — listen for ${aiName} speaking...`, speak: false }); // Ask user to confirm they heard it and like it - const confirm = await getChoice("voice-confirm", "Did you hear the voice? Does it sound good?", [ + const confirmChoices = [ { label: "Sounds great!", value: "yes" }, - { label: "Pick a different voice", value: "change" }, + ...(providerChoice === "elevenlabs" ? [{ label: "Pick a different voice", value: "change" }] : []), { label: "Skip voice for now", value: "skip" }, - ]); + ]; + const confirm = await getChoice("voice-confirm", "Did you hear the voice? Does it sound good?", confirmChoices); - if (confirm === "yes") { - voiceConfirmed = true; - } else if (confirm === "skip") { + if (confirm === "yes" || confirm === "skip") { voiceConfirmed = true; } else { - // Let them pick again + // ElevenLabs: let them pick again + const voiceIds: Record = { male: "pNInz6obpgDQGcFmaJgB", female: "21m00Tcm4TlvDq8ikWAM" }; const newVoice = await getChoice("voice-type-retry", "Choose a different voice:", [ { label: "Female (Rachel)", value: "female", description: "Warm, articulate female voice" }, { label: "Male (Adam)", value: "male", description: "Clear, confident male voice" }, @@ -1112,9 +1197,11 @@ export async function runVoiceSetup( } } - const voiceLabel = state.collected.voiceType === "custom" - ? `Custom voice (${selectedVoiceId.substring(0, 8)}...)` - : state.collected.voiceType || "default"; - await emit({ event: "message", content: `Digital Assistant voice configured: ${voiceLabel}` }); + const voiceLabel = providerChoice === "google-cloud" + ? "Google Cloud TTS (Neural2-D)" + : state.collected.voiceType === "custom" + ? `Custom voice (${selectedVoiceId.substring(0, 8)}...)` + : state.collected.voiceType || "default"; + await emit({ event: "message", content: `Digital Assistant voice configured: ${voiceLabel} (${providerChoice})` }); await emit({ event: "step_complete", step: "voice" }); } diff --git a/Releases/v4.0.3/.claude/PAI-Install/engine/config-gen.ts b/Releases/v4.0.3/.claude/PAI-Install/engine/config-gen.ts index 51789a6a3..cdccdeb68 100644 --- a/Releases/v4.0.3/.claude/PAI-Install/engine/config-gen.ts +++ b/Releases/v4.0.3/.claude/PAI-Install/engine/config-gen.ts @@ -35,6 +35,10 @@ export function generateSettingsJson(config: PAIConfig): Record { fullName: `${config.aiName} — Personal AI`, displayName: config.aiName.toUpperCase(), color: "#3B82F6", + ...(config.ttsProvider ? { ttsProvider: config.ttsProvider } : {}), + ...(config.ttsProvider === "google-cloud" && config.googleCloudVoice + ? { googleCloudVoice: config.googleCloudVoice } + : {}), voices: { main: { voiceId, diff --git a/Releases/v4.0.3/.claude/PAI-Install/engine/detect.ts b/Releases/v4.0.3/.claude/PAI-Install/engine/detect.ts index 3dc25f3bf..a5cb29a10 100644 --- a/Releases/v4.0.3/.claude/PAI-Install/engine/detect.ts +++ b/Releases/v4.0.3/.claude/PAI-Install/engine/detect.ts @@ -100,7 +100,8 @@ function detectExisting( try { const envContent = readFileSync(envPath, "utf-8"); result.elevenLabsKeyFound = envContent.includes("ELEVENLABS_API_KEY="); - result.hasApiKeys = result.elevenLabsKeyFound; + const googleKeyFound = envContent.includes("GOOGLE_CLOUD_API_KEY=") || envContent.includes("GOOGLE_API_KEY="); + result.hasApiKeys = result.elevenLabsKeyFound || googleKeyFound; } catch { // Permission denied or other error } @@ -166,3 +167,21 @@ export async function validateElevenLabsKey(key: string): Promise<{ valid: boole return { valid: false, error: e.message || "Network error" }; } } + +/** + * Validate a Google Cloud API key by listing available TTS voices. + */ +export async function validateGoogleCloudKey(key: string): Promise<{ valid: boolean; error?: string }> { + try { + const res = await fetch( + `https://texttospeech.googleapis.com/v1/voices?key=${key}`, + { signal: AbortSignal.timeout(10000) } + ); + + if (res.ok) return { valid: true }; + const body = await res.text().catch(() => ""); + return { valid: false, error: `HTTP ${res.status}${body ? `: ${body.slice(0, 100)}` : ""}` }; + } catch (e: any) { + return { valid: false, error: e.message || "Network error" }; + } +} diff --git a/Releases/v4.0.3/.claude/PAI-Install/engine/steps.ts b/Releases/v4.0.3/.claude/PAI-Install/engine/steps.ts index 7d5e14d16..10c6bd4b6 100644 --- a/Releases/v4.0.3/.claude/PAI-Install/engine/steps.ts +++ b/Releases/v4.0.3/.claude/PAI-Install/engine/steps.ts @@ -25,7 +25,7 @@ export const STEPS: StepDefinition[] = [ { id: "api-keys", name: "API Keys", - description: "Find or collect ElevenLabs API key for voice features", + description: "Find or collect API keys for voice features", number: 3, required: true, dependsOn: ["prerequisites"], @@ -57,7 +57,7 @@ export const STEPS: StepDefinition[] = [ { id: "voice", name: "Digital Assistant Voice", - description: "Configure ElevenLabs key, select voice, start voice server, and test", + description: "Choose TTS provider, configure API key, select voice, and test", number: 7, required: true, dependsOn: ["configuration"], diff --git a/Releases/v4.0.3/.claude/PAI-Install/engine/types.ts b/Releases/v4.0.3/.claude/PAI-Install/engine/types.ts index 0e93734d8..8ab0bb569 100644 --- a/Releases/v4.0.3/.claude/PAI-Install/engine/types.ts +++ b/Releases/v4.0.3/.claude/PAI-Install/engine/types.ts @@ -79,6 +79,8 @@ export interface InstallState { // Collected data collected: { elevenLabsKey?: string; + googleCloudKey?: string; + ttsProvider?: "elevenlabs" | "google-cloud"; principalName?: string; timezone?: string; aiName?: string; @@ -112,6 +114,14 @@ export interface PAIConfig { temperatureUnit?: "fahrenheit" | "celsius"; voiceType?: string; voiceId?: string; + ttsProvider?: "elevenlabs" | "google-cloud"; + googleCloudVoice?: { + languageCode: string; + voiceName: string; + voiceType: string; + speakingRate: number; + pitch: number; + }; paiDir: string; configDir: string; } @@ -127,7 +137,7 @@ export type ServerMessage = | { type: "input_request"; id: string; prompt: string; inputType: "text" | "password" | "key"; placeholder?: string } | { type: "choice_request"; id: string; prompt: string; choices: { label: string; value: string; description?: string }[] } | { type: "progress"; step: StepId; percent: number; detail: string } - | { type: "voice_enabled"; enabled: boolean; mode: "elevenlabs" | "browser" | "none" } + | { type: "voice_enabled"; enabled: boolean; mode: "elevenlabs" | "google-cloud" | "browser" | "none" } | { type: "install_complete"; success: boolean; summary: InstallSummary } | { type: "validation_result"; checks: ValidationCheck[] } | { type: "error"; message: string; step?: StepId }; diff --git a/Releases/v4.0.3/.claude/VoiceServer/install.sh b/Releases/v4.0.3/.claude/VoiceServer/install.sh index 15b6c83e0..dff7b2fd4 100755 --- a/Releases/v4.0.3/.claude/VoiceServer/install.sh +++ b/Releases/v4.0.3/.claude/VoiceServer/install.sh @@ -1,7 +1,7 @@ #!/bin/bash # Voice Server Installation Script -# This script installs the voice server as a macOS service +# Installs the voice server as a macOS LaunchAgent or Linux systemd user service set -e @@ -14,13 +14,27 @@ NC='\033[0m' # No Color # Configuration SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" -SERVICE_NAME="com.pai.voice-server" -PLIST_PATH="$HOME/Library/LaunchAgents/${SERVICE_NAME}.plist" -LOG_PATH="$HOME/Library/Logs/pai-voice-server.log" +OS_TYPE="$(uname -s)" ENV_FILE="$HOME/.env" +# Platform-specific paths +if [ "$OS_TYPE" = "Darwin" ]; then + SERVICE_NAME="com.pai.voice-server" + PLIST_PATH="$HOME/Library/LaunchAgents/${SERVICE_NAME}.plist" + LOG_PATH="$HOME/Library/Logs/pai-voice-server.log" +elif [ "$OS_TYPE" = "Linux" ]; then + SERVICE_NAME="pai-voice-server" + SYSTEMD_DIR="$HOME/.config/systemd/user" + SERVICE_PATH="${SYSTEMD_DIR}/${SERVICE_NAME}.service" + LOG_PATH="$HOME/.config/pai/voice-server.log" +else + echo -e "${RED}X Unsupported platform: $OS_TYPE${NC}" + echo " PAI Voice Server supports macOS and Linux." + exit 1 +fi + echo -e "${BLUE}=====================================================${NC}" -echo -e "${BLUE} PAI Voice Server Installation${NC}" +echo -e "${BLUE} PAI Voice Server Installation ($OS_TYPE)${NC}" echo -e "${BLUE}=====================================================${NC}" echo @@ -34,52 +48,97 @@ if ! command -v bun &> /dev/null; then fi echo -e "${GREEN}OK Bun is installed${NC}" -# Check for existing installation -if launchctl list | grep -q "$SERVICE_NAME" 2>/dev/null; then - echo -e "${YELLOW}! Voice server is already installed${NC}" - read -p "Do you want to reinstall? (y/n): " -n 1 -r - echo - if [[ $REPLY =~ ^[Yy]$ ]]; then - echo -e "${YELLOW}> Stopping existing service...${NC}" - launchctl unload "$PLIST_PATH" 2>/dev/null || true - echo -e "${GREEN}OK Existing service stopped${NC}" +# Linux: check for audio player +if [ "$OS_TYPE" = "Linux" ]; then + if command -v mpv &> /dev/null; then + echo -e "${GREEN}OK Audio player: mpv${NC}" + elif command -v ffplay &> /dev/null; then + echo -e "${GREEN}OK Audio player: ffplay${NC}" + elif command -v aplay &> /dev/null; then + echo -e "${GREEN}OK Audio player: aplay${NC}" else - echo "Installation cancelled" - exit 0 + echo -e "${YELLOW}! No audio player found (mpv or ffplay recommended)${NC}" + echo " Install one: sudo apt install mpv OR sudo apt install ffmpeg" + fi + if command -v notify-send &> /dev/null; then + echo -e "${GREEN}OK Desktop notifications: notify-send${NC}" + else + echo -e "${YELLOW}! notify-send not found — desktop notifications will be skipped${NC}" + echo " Install: sudo apt install libnotify-bin" fi fi -# Check for ElevenLabs configuration -echo -e "${YELLOW}> Checking ElevenLabs configuration...${NC}" -if [ -f "$ENV_FILE" ] && grep -q "ELEVENLABS_API_KEY=" "$ENV_FILE"; then - API_KEY=$(grep "ELEVENLABS_API_KEY=" "$ENV_FILE" | cut -d'=' -f2) - if [ "$API_KEY" != "your_api_key_here" ] && [ -n "$API_KEY" ]; then - echo -e "${GREEN}OK ElevenLabs API key configured${NC}" - ELEVENLABS_CONFIGURED=true - else - echo -e "${YELLOW}! ElevenLabs API key not configured${NC}" - echo " Voice server will use macOS 'say' command as fallback" - ELEVENLABS_CONFIGURED=false +# Check for existing installation +if [ "$OS_TYPE" = "Darwin" ]; then + if launchctl list | grep -q "$SERVICE_NAME" 2>/dev/null; then + echo -e "${YELLOW}! Voice server is already installed${NC}" + read -p "Do you want to reinstall? (y/n): " -n 1 -r + echo + if [[ $REPLY =~ ^[Yy]$ ]]; then + echo -e "${YELLOW}> Stopping existing service...${NC}" + launchctl unload "$PLIST_PATH" 2>/dev/null || true + echo -e "${GREEN}OK Existing service stopped${NC}" + else + echo "Installation cancelled" + exit 0 + fi + fi +elif [ "$OS_TYPE" = "Linux" ]; then + if systemctl --user is-active "$SERVICE_NAME" &>/dev/null; then + echo -e "${YELLOW}! Voice server is already installed${NC}" + read -p "Do you want to reinstall? (y/n): " -n 1 -r + echo + if [[ $REPLY =~ ^[Yy]$ ]]; then + echo -e "${YELLOW}> Stopping existing service...${NC}" + systemctl --user stop "$SERVICE_NAME" 2>/dev/null || true + systemctl --user disable "$SERVICE_NAME" 2>/dev/null || true + echo -e "${GREEN}OK Existing service stopped${NC}" + else + echo "Installation cancelled" + exit 0 + fi fi -else - echo -e "${YELLOW}! No ElevenLabs configuration found${NC}" - echo " Voice server will use macOS 'say' command as fallback" - ELEVENLABS_CONFIGURED=false fi -if [ "$ELEVENLABS_CONFIGURED" = false ]; then +# Check for TTS API configuration +echo -e "${YELLOW}> Checking TTS configuration...${NC}" +TTS_CONFIGURED=false +TTS_PROVIDER="none" + +if [ -f "$ENV_FILE" ]; then + if grep -q "ELEVENLABS_API_KEY=" "$ENV_FILE"; then + API_KEY=$(grep "ELEVENLABS_API_KEY=" "$ENV_FILE" | cut -d'=' -f2) + if [ "$API_KEY" != "your_api_key_here" ] && [ -n "$API_KEY" ]; then + echo -e "${GREEN}OK ElevenLabs API key configured${NC}" + TTS_CONFIGURED=true + TTS_PROVIDER="ElevenLabs" + fi + fi + if grep -q "GOOGLE_CLOUD_API_KEY=\|GOOGLE_API_KEY=" "$ENV_FILE"; then + GC_KEY=$(grep -m1 "GOOGLE_CLOUD_API_KEY=\|GOOGLE_API_KEY=" "$ENV_FILE" | cut -d'=' -f2) + if [ -n "$GC_KEY" ]; then + echo -e "${GREEN}OK Google Cloud API key configured${NC}" + TTS_CONFIGURED=true + [ "$TTS_PROVIDER" = "none" ] && TTS_PROVIDER="Google Cloud" + fi + fi +fi + +if [ "$TTS_CONFIGURED" = false ]; then + echo -e "${YELLOW}! No TTS API key found${NC}" echo - echo "To enable AI voices, add your ElevenLabs API key to ~/.env:" - echo " echo 'ELEVENLABS_API_KEY=your_api_key_here' >> ~/.env" - echo " Get a free key at: https://elevenlabs.io" + echo "To enable AI voices, add an API key to ~/.env:" + echo " ElevenLabs: echo 'ELEVENLABS_API_KEY=your_key' >> ~/.env" + echo " Google Cloud: echo 'GOOGLE_CLOUD_API_KEY=your_key' >> ~/.env" echo fi -# Create LaunchAgent plist -echo -e "${YELLOW}> Creating LaunchAgent configuration...${NC}" -mkdir -p "$HOME/Library/LaunchAgents" +# Create service configuration +if [ "$OS_TYPE" = "Darwin" ]; then + echo -e "${YELLOW}> Creating LaunchAgent configuration...${NC}" + mkdir -p "$HOME/Library/LaunchAgents" -cat > "$PLIST_PATH" << EOF + cat > "$PLIST_PATH" << EOF @@ -123,15 +182,57 @@ cat > "$PLIST_PATH" << EOF EOF -echo -e "${GREEN}OK LaunchAgent configuration created${NC}" + echo -e "${GREEN}OK LaunchAgent configuration created${NC}" -# Load the LaunchAgent -echo -e "${YELLOW}> Starting voice server service...${NC}" -launchctl load "$PLIST_PATH" 2>/dev/null || { - echo -e "${RED}X Failed to load LaunchAgent${NC}" - echo " Try manually: launchctl load $PLIST_PATH" - exit 1 -} + # Load the LaunchAgent + echo -e "${YELLOW}> Starting voice server service...${NC}" + launchctl load "$PLIST_PATH" 2>/dev/null || { + echo -e "${RED}X Failed to load LaunchAgent${NC}" + echo " Try manually: launchctl load $PLIST_PATH" + exit 1 + } + +elif [ "$OS_TYPE" = "Linux" ]; then + echo -e "${YELLOW}> Creating systemd user service...${NC}" + mkdir -p "$SYSTEMD_DIR" + mkdir -p "$(dirname "$LOG_PATH")" + + # Resolve bun path for systemd + BUN_PATH="$(which bun)" + + cat > "$SERVICE_PATH" << EOF +[Unit] +Description=PAI Voice Server +After=network.target + +[Service] +Type=simple +ExecStart=${BUN_PATH} run ${SCRIPT_DIR}/server.ts +WorkingDirectory=${SCRIPT_DIR} +Restart=on-failure +RestartSec=5 +Environment=HOME=${HOME} +Environment=PATH=/usr/local/bin:/usr/bin:/bin:${HOME}/.bun/bin +StandardOutput=append:${LOG_PATH} +StandardError=append:${LOG_PATH} + +[Install] +WantedBy=default.target +EOF + + echo -e "${GREEN}OK systemd service created${NC}" + + # Enable and start the service + echo -e "${YELLOW}> Starting voice server service...${NC}" + systemctl --user daemon-reload + systemctl --user enable "$SERVICE_NAME" 2>/dev/null || true + systemctl --user start "$SERVICE_NAME" || { + echo -e "${RED}X Failed to start systemd service${NC}" + echo " Try manually: systemctl --user start $SERVICE_NAME" + echo " Check logs: journalctl --user -u $SERVICE_NAME" + exit 1 + } +fi # Wait for server to start sleep 2 @@ -161,15 +262,16 @@ echo -e "${GREEN} Installation Complete!${NC}" echo -e "${GREEN}=====================================================${NC}" echo echo -e "${BLUE}Service Information:${NC}" +echo " - Platform: $OS_TYPE" echo " - Service: $SERVICE_NAME" echo " - Status: Running" echo " - Port: 8888" echo " - Logs: $LOG_PATH" -if [ "$ELEVENLABS_CONFIGURED" = true ]; then - echo " - Voice: ElevenLabs AI" +if [ "$TTS_CONFIGURED" = true ]; then + echo " - Voice: $TTS_PROVIDER" else - echo " - Voice: macOS Say (fallback)" + echo " - Voice: Not configured (add API key to ~/.env)" fi echo @@ -180,6 +282,14 @@ echo " - Start: ./start.sh" echo " - Restart: ./restart.sh" echo " - Uninstall: ./uninstall.sh" +if [ "$OS_TYPE" = "Linux" ]; then + echo + echo -e "${BLUE}systemd Commands:${NC}" + echo " - Status: systemctl --user status $SERVICE_NAME" + echo " - Logs: journalctl --user -u $SERVICE_NAME -f" + echo " - Restart: systemctl --user restart $SERVICE_NAME" +fi + echo echo -e "${BLUE}Test the server:${NC}" echo " curl -X POST http://localhost:8888/notify \\" @@ -189,18 +299,20 @@ echo " -d '{\"message\": \"Hello from PAI\"}'" echo echo -e "${GREEN}The voice server will now start automatically when you log in.${NC}" -# Ask about menu bar indicator -echo -read -p "Would you like to install a menu bar indicator? (y/n): " -n 1 -r -echo -if [[ $REPLY =~ ^[Yy]$ ]]; then - echo -e "${YELLOW}> Installing menu bar indicator...${NC}" - if [ -f "$SCRIPT_DIR/menubar/install-menubar.sh" ]; then - chmod +x "$SCRIPT_DIR/menubar/install-menubar.sh" - "$SCRIPT_DIR/menubar/install-menubar.sh" - else - echo -e "${YELLOW}! Menu bar installer not found${NC}" - echo " You can install it manually later from:" - echo " $SCRIPT_DIR/menubar/install-menubar.sh" +# Ask about menu bar indicator (macOS only) +if [ "$OS_TYPE" = "Darwin" ]; then + echo + read -p "Would you like to install a menu bar indicator? (y/n): " -n 1 -r + echo + if [[ $REPLY =~ ^[Yy]$ ]]; then + echo -e "${YELLOW}> Installing menu bar indicator...${NC}" + if [ -f "$SCRIPT_DIR/menubar/install-menubar.sh" ]; then + chmod +x "$SCRIPT_DIR/menubar/install-menubar.sh" + "$SCRIPT_DIR/menubar/install-menubar.sh" + else + echo -e "${YELLOW}! Menu bar installer not found${NC}" + echo " You can install it manually later from:" + echo " $SCRIPT_DIR/menubar/install-menubar.sh" + fi fi fi diff --git a/Releases/v4.0.3/.claude/VoiceServer/server.ts b/Releases/v4.0.3/.claude/VoiceServer/server.ts index 9f5dec95c..3be02aefe 100644 --- a/Releases/v4.0.3/.claude/VoiceServer/server.ts +++ b/Releases/v4.0.3/.claude/VoiceServer/server.ts @@ -1,6 +1,13 @@ #!/usr/bin/env bun /** - * Voice Server - Personal AI Voice notification server using ElevenLabs TTS + * Voice Server - Personal AI Voice notification server with multi-provider TTS + * + * Supported TTS providers: + * - ElevenLabs (default) — high-quality AI voices, 10K free chars/month + * - Google Cloud TTS — WaveNet/Neural2/Standard voices, up to 4M free chars/month + * + * Provider selection: settings.json daidentity.ttsProvider ("elevenlabs" | "google-cloud") + * Falls back to ElevenLabs if not specified (backwards compatible). * * Architecture: Pure pass-through. All voice config comes from settings.json. * The server has zero hardcoded voice parameters. @@ -11,7 +18,7 @@ * 3. Neither → use settings.json daidentity.voices.main as default * * Pronunciation preprocessing: loads pronunciations.json and applies - * word-boundary replacements before sending text to ElevenLabs TTS. + * word-boundary replacements before sending text to TTS. */ import { serve } from "bun"; @@ -34,11 +41,9 @@ if (existsSync(envPath)) { const PORT = parseInt(process.env.PORT || "8888"); const ELEVENLABS_API_KEY = process.env.ELEVENLABS_API_KEY; +const GOOGLE_CLOUD_API_KEY = process.env.GOOGLE_CLOUD_API_KEY || process.env.GOOGLE_API_KEY; -if (!ELEVENLABS_API_KEY) { - console.error('⚠️ ELEVENLABS_API_KEY not found in ~/.env'); - console.error('Add: ELEVENLABS_API_KEY=your_key_here'); -} +// TTS provider is resolved after settings.json is loaded (see below) // ========================================================================== // Pronunciation System @@ -130,9 +135,31 @@ interface VoiceEntry { volume: number; } +// TTS provider type +type TtsProvider = 'elevenlabs' | 'google-cloud'; + +// Google Cloud TTS voice configuration +interface GoogleCloudVoiceConfig { + languageCode: string; // e.g. "en-US" + voiceName: string; // e.g. "en-US-Neural2-D" + voiceType: 'STANDARD' | 'WAVENET' | 'NEURAL2'; + speakingRate: number; // 0.25 to 4.0, default 1.0 + pitch: number; // -20.0 to 20.0, default 0.0 +} + +const FALLBACK_GOOGLE_VOICE: GoogleCloudVoiceConfig = { + languageCode: 'en-US', + voiceName: 'en-US-Neural2-D', + voiceType: 'NEURAL2', + speakingRate: 1.0, + pitch: 0.0, +}; + // Loaded config from settings.json interface LoadedVoiceConfig { defaultVoiceId: string; + ttsProvider: TtsProvider; + googleVoice: GoogleCloudVoiceConfig; voices: Record; // keyed by name ("main", "algorithm") voicesByVoiceId: Record; // keyed by voiceId for lookup desktopNotifications: boolean; // whether to show macOS notification banners @@ -155,7 +182,7 @@ function loadVoiceConfig(): LoadedVoiceConfig { try { if (!existsSync(settingsPath)) { console.warn('⚠️ settings.json not found — using fallback voice defaults'); - return { defaultVoiceId: '', voices: {}, voicesByVoiceId: {}, desktopNotifications: true }; + return { defaultVoiceId: '', ttsProvider: 'elevenlabs', googleVoice: FALLBACK_GOOGLE_VOICE, voices: {}, voicesByVoiceId: {}, desktopNotifications: true }; } const content = readFileSync(settingsPath, 'utf-8'); @@ -189,16 +216,30 @@ function loadVoiceConfig(): LoadedVoiceConfig { // Default voice ID from settings const defaultVoiceId = voices.main?.voiceId || daidentity.mainDAVoiceID || ''; + // TTS provider selection + const ttsProvider: TtsProvider = daidentity.ttsProvider === 'google-cloud' ? 'google-cloud' : 'elevenlabs'; + + // Google Cloud voice config + const gcVoice = daidentity.googleCloudVoice || {}; + const googleVoice: GoogleCloudVoiceConfig = { + languageCode: gcVoice.languageCode || FALLBACK_GOOGLE_VOICE.languageCode, + voiceName: gcVoice.voiceName || FALLBACK_GOOGLE_VOICE.voiceName, + voiceType: gcVoice.voiceType || FALLBACK_GOOGLE_VOICE.voiceType, + speakingRate: gcVoice.speakingRate ?? FALLBACK_GOOGLE_VOICE.speakingRate, + pitch: gcVoice.pitch ?? FALLBACK_GOOGLE_VOICE.pitch, + }; + const voiceNames = Object.keys(voices); console.log(`✅ Loaded ${voiceNames.length} voice config(s) from settings.json: ${voiceNames.join(', ')}`); + console.log(`🔊 TTS provider: ${ttsProvider}`); for (const [name, entry] of Object.entries(voices)) { console.log(` ${name}: ${entry.voiceName || entry.voiceId} (speed: ${entry.speed}, stability: ${entry.stability})`); } - return { defaultVoiceId, voices, voicesByVoiceId, desktopNotifications }; + return { defaultVoiceId, ttsProvider, googleVoice, voices, voicesByVoiceId, desktopNotifications }; } catch (error) { console.error('⚠️ Failed to load settings.json voice config:', error); - return { defaultVoiceId: '', voices: {}, voicesByVoiceId: {}, desktopNotifications: true }; + return { defaultVoiceId: '', ttsProvider: 'elevenlabs', googleVoice: FALLBACK_GOOGLE_VOICE, voices: {}, voicesByVoiceId: {}, desktopNotifications: true }; } } @@ -330,19 +371,13 @@ function validateInput(input: any): { valid: boolean; error?: string; sanitized? } // Generate speech using ElevenLabs API — pure pass-through of voice_settings -async function generateSpeech( +async function generateSpeechElevenLabs( text: string, voiceId: string, voiceSettings: ElevenLabsVoiceSettings ): Promise { if (!ELEVENLABS_API_KEY) { - throw new Error('ElevenLabs API key not configured'); - } - - // Apply pronunciation replacements before sending to TTS - const pronouncedText = applyPronunciations(text); - if (pronouncedText !== text) { - console.log(`📖 Pronunciation: "${text}" → "${pronouncedText}"`); + throw new Error('ElevenLabs API key not configured — add ELEVENLABS_API_KEY to ~/.env'); } const url = `https://api.elevenlabs.io/v1/text-to-speech/${voiceId}`; @@ -355,7 +390,7 @@ async function generateSpeech( 'xi-api-key': ELEVENLABS_API_KEY, }, body: JSON.stringify({ - text: pronouncedText, + text, model_id: 'eleven_turbo_v2_5', voice_settings: voiceSettings, }), @@ -369,14 +404,110 @@ async function generateSpeech( return await response.arrayBuffer(); } -// Play audio using afplay (macOS) +// Generate speech using Google Cloud Text-to-Speech REST API (no SDK dependency) +async function generateSpeechGoogleCloud( + text: string, + gcVoice: GoogleCloudVoiceConfig +): Promise { + if (!GOOGLE_CLOUD_API_KEY) { + throw new Error('Google Cloud API key not configured — add GOOGLE_CLOUD_API_KEY to ~/.env'); + } + + const url = `https://texttospeech.googleapis.com/v1/text:synthesize?key=${GOOGLE_CLOUD_API_KEY}`; + + const response = await fetch(url, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + input: { text }, + voice: { + languageCode: gcVoice.languageCode, + name: gcVoice.voiceName, + }, + audioConfig: { + audioEncoding: 'MP3', + speakingRate: gcVoice.speakingRate, + pitch: gcVoice.pitch, + }, + }), + }); + + if (!response.ok) { + const errorText = await response.text(); + throw new Error(`Google Cloud TTS API error: ${response.status} - ${errorText}`); + } + + const data = await response.json() as { audioContent: string }; + + // Google returns base64-encoded audio — decode to ArrayBuffer + const binaryString = atob(data.audioContent); + const bytes = new Uint8Array(binaryString.length); + for (let i = 0; i < binaryString.length; i++) { + bytes[i] = binaryString.charCodeAt(i); + } + return bytes.buffer; +} + +// Route TTS generation to the configured provider +async function generateSpeech( + text: string, + voiceId: string, + voiceSettings: ElevenLabsVoiceSettings +): Promise { + // Apply pronunciation replacements before sending to TTS + const pronouncedText = applyPronunciations(text); + if (pronouncedText !== text) { + console.log(`📖 Pronunciation: "${text}" → "${pronouncedText}"`); + } + + if (voiceConfig.ttsProvider === 'google-cloud') { + return generateSpeechGoogleCloud(pronouncedText, voiceConfig.googleVoice); + } + return generateSpeechElevenLabs(pronouncedText, voiceId, voiceSettings); +} + +// Detect platform and pick audio player +function getAudioPlayer(): { command: string; args: (file: string, volume: number) => string[] } { + const platform = process.platform; + + if (platform === 'darwin') { + return { + command: '/usr/bin/afplay', + args: (file, volume) => ['-v', volume.toString(), file], + }; + } + + // Linux: try mpv, ffplay, aplay in order + const linuxPlayers: Array<{ cmd: string; args: (file: string, volume: number) => string[] }> = [ + { cmd: 'mpv', args: (file, volume) => ['--no-video', `--volume=${Math.round(volume * 100)}`, file] }, + { cmd: 'ffplay', args: (file, _volume) => ['-nodisp', '-autoexit', '-loglevel', 'quiet', file] }, + { cmd: 'aplay', args: (file, _volume) => [file] }, + ]; + + for (const player of linuxPlayers) { + try { + const result = Bun.spawnSync(['which', player.cmd]); + if (result.exitCode === 0) { + return { command: player.cmd, args: player.args }; + } + } catch { /* try next */ } + } + + // Fallback to mpv (will error at runtime if not installed) + return { command: 'mpv', args: (file, volume) => ['--no-video', `--volume=${Math.round(volume * 100)}`, file] }; +} + +const audioPlayer = getAudioPlayer(); +console.log(`🔈 Audio player: ${audioPlayer.command} (${process.platform})`); + +// Play audio using platform-appropriate player async function playAudio(audioBuffer: ArrayBuffer, volume: number = FALLBACK_VOLUME): Promise { const tempFile = `/tmp/voice-${Date.now()}.mp3`; await Bun.write(tempFile, audioBuffer); return new Promise((resolve, reject) => { - const proc = spawn('/usr/bin/afplay', ['-v', volume.toString(), tempFile]); + const proc = spawn(audioPlayer.command, audioPlayer.args(tempFile, volume)); proc.on('error', (error) => { console.error('Error playing audio:', error); @@ -388,7 +519,7 @@ async function playAudio(audioBuffer: ArrayBuffer, volume: number = FALLBACK_VOL if (code === 0) { resolve(); } else { - reject(new Error(`afplay exited with code ${code}`)); + reject(new Error(`${audioPlayer.command} exited with code ${code}`)); } }); }); @@ -512,13 +643,20 @@ async function sendNotification( } } - // Display macOS notification (can be disabled via settings.json: notifications.desktop.enabled: false) + // Display desktop notification (can be disabled via settings.json: notifications.desktop.enabled: false) if (voiceConfig.desktopNotifications) { try { - const escapedTitle = escapeForAppleScript(safeTitle); - const escapedMessage = escapeForAppleScript(safeMessage); - const script = `display notification "${escapedMessage}" with title "${escapedTitle}" sound name ""`; - await spawnSafe('/usr/bin/osascript', ['-e', script]); + if (process.platform === 'darwin') { + const escapedTitle = escapeForAppleScript(safeTitle); + const escapedMessage = escapeForAppleScript(safeMessage); + const script = `display notification "${escapedMessage}" with title "${escapedTitle}" sound name ""`; + await spawnSafe('/usr/bin/osascript', ['-e', script]); + } else { + // Linux: try notify-send if available + try { + await spawnSafe('notify-send', [safeTitle, safeMessage]); + } catch { /* notify-send not available, skip silently */ } + } } catch (error) { console.error("Notification display error:", error); } @@ -688,9 +826,11 @@ const server = serve({ JSON.stringify({ status: "healthy", port: PORT, - voice_system: "ElevenLabs", + voice_system: voiceConfig.ttsProvider, default_voice_id: DEFAULT_VOICE_ID, - api_key_configured: !!ELEVENLABS_API_KEY, + elevenlabs_configured: !!ELEVENLABS_API_KEY, + google_cloud_configured: !!GOOGLE_CLOUD_API_KEY, + audio_player: audioPlayer.command, pronunciation_rules: pronunciationRules.length, configured_voices: Object.keys(voiceConfig.voices), }), @@ -709,8 +849,14 @@ const server = serve({ }); console.log(`🚀 Voice Server running on port ${PORT}`); -console.log(`🎙️ Using ElevenLabs TTS (default voice: ${DEFAULT_VOICE_ID})`); +console.log(`🔊 TTS provider: ${voiceConfig.ttsProvider}`); +if (voiceConfig.ttsProvider === 'google-cloud') { + console.log(`🎙️ Using Google Cloud TTS (voice: ${voiceConfig.googleVoice.voiceName}, type: ${voiceConfig.googleVoice.voiceType})`); + console.log(`🔑 Google Cloud API Key: ${GOOGLE_CLOUD_API_KEY ? '✅ Configured' : '❌ Missing'}`); +} else { + console.log(`🎙️ Using ElevenLabs TTS (default voice: ${DEFAULT_VOICE_ID})`); + console.log(`🔑 ElevenLabs API Key: ${ELEVENLABS_API_KEY ? '✅ Configured' : '❌ Missing'}`); +} console.log(`📡 POST to http://localhost:${PORT}/notify`); console.log(`🔒 Security: CORS restricted to localhost, rate limiting enabled`); -console.log(`🔑 API Key: ${ELEVENLABS_API_KEY ? '✅ Configured' : '❌ Missing'}`); console.log(`📖 Pronunciations: ${pronunciationRules.length} rules loaded`);