diff --git a/package-lock.json b/package-lock.json index f45b1e5..e35e83c 100644 --- a/package-lock.json +++ b/package-lock.json @@ -10,6 +10,7 @@ "dependencies": { "@11labs/client": "^0.0.4", "@base-ui/react": "^1.2.0", + "@elevenlabs/react": "^0.14.1", "@radix-ui/react-accordion": "^1.2.12", "@radix-ui/react-dialog": "^1.1.15", "@radix-ui/react-separator": "^1.1.8", @@ -1695,6 +1696,12 @@ } } }, + "node_modules/@bufbuild/protobuf": { + "version": "1.10.1", + "resolved": "https://registry.npmjs.org/@bufbuild/protobuf/-/protobuf-1.10.1.tgz", + "integrity": "sha512-wJ8ReQbHxsAfXhrf9ixl0aYbZorRuOWpBNzm8pL8ftmSxQx/wnJD5Eg861NwJU/czy2VXFIebCeZnZrI9rktIQ==", + "license": "(Apache-2.0 AND BSD-3-Clause)" + }, "node_modules/@colors/colors": { "version": "1.6.0", "resolved": "https://registry.npmjs.org/@colors/colors/-/colors-1.6.0.tgz", @@ -1863,6 +1870,33 @@ "node": ">=18" } }, + "node_modules/@elevenlabs/client": { + "version": "0.15.0", + "resolved": "https://registry.npmjs.org/@elevenlabs/client/-/client-0.15.0.tgz", + "integrity": "sha512-Fxoq+nZj6U9ykeQx07UGMFumw1hxE9Cr4O3IX3FfMjl3ZMRVMPfv8UOuqdoWMrLG6gZoIBeimfdc61St6z4JpQ==", + "license": "MIT", + "dependencies": { + "@elevenlabs/types": "0.6.0", + "livekit-client": "^2.11.4" + } + }, + "node_modules/@elevenlabs/react": { + "version": "0.14.1", + "resolved": "https://registry.npmjs.org/@elevenlabs/react/-/react-0.14.1.tgz", + "integrity": "sha512-UmpArsuuEmGueKEzhaSEE2Pm9otr3HX6wSK8u3UW2eOCjjy0xLV3uaXAlaTquTNt+fqdILkm4PR+8DMUuabv8w==", + "license": "MIT", + "dependencies": { + "@elevenlabs/client": "0.15.0" + }, + "peerDependencies": { + "react": ">=16.8.0" + } + }, + "node_modules/@elevenlabs/types": { + "version": "0.6.0", + "resolved": "https://registry.npmjs.org/@elevenlabs/types/-/types-0.6.0.tgz", + "integrity": "sha512-2FihOiGf+KlTH1EYQoYsvnhSCMMaHZMDSXMYI25NUtv0NmWi86bB9Hp4BAHbNeEkCksPXKbPiX3b6KgP2JIg2g==" + }, "node_modules/@emnapi/runtime": { "version": "1.8.1", "resolved": "https://registry.npmjs.org/@emnapi/runtime/-/runtime-1.8.1.tgz", @@ -3178,6 +3212,21 @@ "@jridgewell/sourcemap-codec": "^1.4.14" } }, + "node_modules/@livekit/mutex": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@livekit/mutex/-/mutex-1.1.1.tgz", + "integrity": "sha512-EsshAucklmpuUAfkABPxJNhzj9v2sG7JuzFDL4ML1oJQSV14sqrpTYnsaOudMAw9yOaW53NU3QQTlUQoRs4czw==", + "license": "Apache-2.0" + }, + "node_modules/@livekit/protocol": { + "version": "1.44.0", + "resolved": "https://registry.npmjs.org/@livekit/protocol/-/protocol-1.44.0.tgz", + "integrity": "sha512-/vfhDUGcUKO8Q43r6i+5FrDhl5oZjm/X3U4x2Iciqvgn5C8qbj+57YPcWSJ1kyIZm5Cm6AV2nAPjMm3ETD/iyg==", + "license": "Apache-2.0", + "dependencies": { + "@bufbuild/protobuf": "^1.10.0" + } + }, "node_modules/@mapbox/node-pre-gyp": { "version": "2.0.3", "resolved": "https://registry.npmjs.org/@mapbox/node-pre-gyp/-/node-pre-gyp-2.0.3.tgz", @@ -8612,6 +8661,13 @@ "dev": true, "license": "MIT" }, + "node_modules/@types/dom-mediacapture-record": { + "version": "1.0.22", + "resolved": "https://registry.npmjs.org/@types/dom-mediacapture-record/-/dom-mediacapture-record-1.0.22.tgz", + "integrity": "sha512-mUMZLK3NvwRLcAAT9qmcK+9p7tpU2FHdDsntR3YI4+GY88XrgG4XiE7u1Q2LAN2/FZOz/tdMDC3GQCR4T8nFuw==", + "license": "MIT", + "peer": true + }, "node_modules/@types/draco3d": { "version": "1.4.10", "resolved": "https://registry.npmjs.org/@types/draco3d/-/draco3d-1.4.10.tgz", @@ -13025,7 +13081,6 @@ "version": "3.3.0", "resolved": "https://registry.npmjs.org/events/-/events-3.3.0.tgz", "integrity": "sha512-mQw+2fkQbALzQ7V0MY0IqdnXNOeTtP4r0lN9z7AAawCXgqea7bDii20AYrIBrFd/Hx0M2Ocz6S111CaFkUcb0Q==", - "dev": true, "license": "MIT", "engines": { "node": ">=0.8.x" @@ -15309,6 +15364,15 @@ "jiti": "bin/jiti.js" } }, + "node_modules/jose": { + "version": "6.2.1", + "resolved": "https://registry.npmjs.org/jose/-/jose-6.2.1.tgz", + "integrity": "sha512-jUaKr1yrbfaImV7R2TN/b3IcZzsw38/chqMpo2XJ7i2F8AfM/lA4G1goC3JVEwg0H7UldTmSt3P68nt31W7/mw==", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/panva" + } + }, "node_modules/jpeg-js": { "version": "0.4.4", "resolved": "https://registry.npmjs.org/jpeg-js/-/jpeg-js-0.4.4.tgz", @@ -15773,6 +15837,27 @@ "dev": true, "license": "MIT" }, + "node_modules/livekit-client": { + "version": "2.17.2", + "resolved": "https://registry.npmjs.org/livekit-client/-/livekit-client-2.17.2.tgz", + "integrity": "sha512-+67y2EtAWZabARlY7kANl/VT1Uu1EJYR5a8qwpT2ub/uBCltsEgEDOxCIMwE9HFR5w+z41HR6GL9hyEvW/y6CQ==", + "license": "Apache-2.0", + "dependencies": { + "@livekit/mutex": "1.1.1", + "@livekit/protocol": "1.44.0", + "events": "^3.3.0", + "jose": "^6.1.0", + "loglevel": "^1.9.2", + "sdp-transform": "^2.15.0", + "ts-debounce": "^4.0.0", + "tslib": "2.8.1", + "typed-emitter": "^2.1.0", + "webrtc-adapter": "^9.0.1" + }, + "peerDependencies": { + "@types/dom-mediacapture-record": "^1" + } + }, "node_modules/locate-path": { "version": "6.0.0", "resolved": "https://registry.npmjs.org/locate-path/-/locate-path-6.0.0.tgz", @@ -19489,6 +19574,16 @@ "queue-microtask": "^1.2.2" } }, + "node_modules/rxjs": { + "version": "7.8.2", + "resolved": "https://registry.npmjs.org/rxjs/-/rxjs-7.8.2.tgz", + "integrity": "sha512-dhKf903U/PQZY6boNNtAGdWbG85WAbjT/1xYoZIC7FAY0yWapOBQVsVrDl58W86//e1VpMNBtRV4MaXfdMySFA==", + "license": "Apache-2.0", + "optional": true, + "dependencies": { + "tslib": "^2.1.0" + } + }, "node_modules/safe-array-concat": { "version": "1.1.3", "resolved": "https://registry.npmjs.org/safe-array-concat/-/safe-array-concat-1.1.3.tgz", @@ -19610,6 +19705,21 @@ "loose-envify": "^1.1.0" } }, + "node_modules/sdp": { + "version": "3.2.1", + "resolved": "https://registry.npmjs.org/sdp/-/sdp-3.2.1.tgz", + "integrity": "sha512-lwsAIzOPlH8/7IIjjz3K0zYBk7aBVVcvjMwt3M4fLxpjMYyy7i3I97SLHebgn4YBjirkzfp3RvRDWSKsh/+WFw==", + "license": "MIT" + }, + "node_modules/sdp-transform": { + "version": "2.15.0", + "resolved": "https://registry.npmjs.org/sdp-transform/-/sdp-transform-2.15.0.tgz", + "integrity": "sha512-KrOH82c/W+GYQ0LHqtr3caRpM3ITglq3ljGUIb8LTki7ByacJZ9z+piSGiwZDsRyhQbYBOBJgr2k6X4BZXi3Kw==", + "license": "MIT", + "bin": { + "sdp-verify": "checker.js" + } + }, "node_modules/semver": { "version": "6.3.1", "resolved": "https://registry.npmjs.org/semver/-/semver-6.3.1.tgz", @@ -21053,6 +21163,12 @@ "typescript": ">=4.8.4" } }, + "node_modules/ts-debounce": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/ts-debounce/-/ts-debounce-4.0.0.tgz", + "integrity": "sha512-+1iDGY6NmOGidq7i7xZGA4cm8DAa6fqdYcvO5Z6yBevH++Bdo9Qt/mN0TzHUgcCcKv1gmh9+W5dHqz8pMWbCbg==", + "license": "MIT" + }, "node_modules/ts-interface-checker": { "version": "0.1.13", "resolved": "https://registry.npmjs.org/ts-interface-checker/-/ts-interface-checker-0.1.13.tgz", @@ -21186,6 +21302,15 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/typed-emitter": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/typed-emitter/-/typed-emitter-2.1.0.tgz", + "integrity": "sha512-g/KzbYKbH5C2vPkaXGu8DJlHrGKHLsM25Zg9WuC9pMGfuvT+X25tZQWo5fK1BjBm8+UrVE9LDCvaY0CQk+fXDA==", + "license": "MIT", + "optionalDependencies": { + "rxjs": "*" + } + }, "node_modules/typescript": { "version": "5.9.3", "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.3.tgz", @@ -22560,6 +22685,19 @@ "node": ">=20" } }, + "node_modules/webrtc-adapter": { + "version": "9.0.4", + "resolved": "https://registry.npmjs.org/webrtc-adapter/-/webrtc-adapter-9.0.4.tgz", + "integrity": "sha512-5ZZY1+lGq8LEKuDlg9M2RPJHlH3R7OVwyHqMcUsLKCgd9Wvf+QrFTCItkXXYPmrJn8H6gRLXbSgxLLdexiqHxw==", + "license": "BSD-3-Clause", + "dependencies": { + "sdp": "^3.2.0" + }, + "engines": { + "node": ">=6.0.0", + "npm": ">=3.10.0" + } + }, "node_modules/whatwg-encoding": { "version": "3.1.1", "resolved": "https://registry.npmjs.org/whatwg-encoding/-/whatwg-encoding-3.1.1.tgz", diff --git a/package.json b/package.json index ca71b95..085360c 100644 --- a/package.json +++ b/package.json @@ -13,6 +13,7 @@ "dependencies": { "@11labs/client": "^0.0.4", "@base-ui/react": "^1.2.0", + "@elevenlabs/react": "^0.14.1", "@radix-ui/react-accordion": "^1.2.12", "@radix-ui/react-dialog": "^1.1.15", "@radix-ui/react-separator": "^1.1.8", diff --git a/src/components/DemoCall/UserPhoneInterface.tsx b/src/components/DemoCall/UserPhoneInterface.tsx index 73e60ee..5abf319 100644 --- a/src/components/DemoCall/UserPhoneInterface.tsx +++ b/src/components/DemoCall/UserPhoneInterface.tsx @@ -1,11 +1,9 @@ import { useState, useEffect, useRef, useCallback } from 'react'; import { motion, AnimatePresence } from 'framer-motion'; -import { Phone, PhoneOff, Volume2, VolumeX, Minimize2, Maximize2, Globe, ArrowLeft } from 'lucide-react'; +import { Phone, PhoneOff, Volume2, VolumeX, Minimize2, Maximize2, ArrowLeft } from 'lucide-react'; +import { useConversation } from '@elevenlabs/react'; import { cn } from '../../utils/cn'; import { useDemoCall } from '../../contexts/DemoCallContext'; -import { aiService } from '../../services/aiService'; -import { ttsService, TTSLanguage, TTS_LANGUAGES } from '../../services/ttsService'; -import { getGroqSettings, type VoiceProviderType } from './GroqSettings'; interface UserPhoneInterfaceProps { isDark?: boolean; @@ -16,25 +14,9 @@ interface UserPhoneInterfaceProps { onBack?: () => void; } -// Speech Recognition types - use any to avoid conflicts with other declarations -// eslint-disable-next-line @typescript-eslint/no-explicit-any -type SpeechRecognitionInstance = any; - -interface SpeechRecognitionEvent { - resultIndex: number; - // eslint-disable-next-line @typescript-eslint/no-explicit-any - results: any; -} - -interface SpeechRecognitionErrorEvent { - error: string; -} - export default function UserPhoneInterface({ isDark = true, onTranscript, - // eslint-disable-next-line @typescript-eslint/no-unused-vars - mode = 'standalone', autoStart = false, onBack @@ -44,10 +26,6 @@ export default function UserPhoneInterface({ startCall, endCall, addMessage, - knowledgeBase, - updateExtractedField, - setCallPriority, - setCallCategory } = useDemoCall(); const [callDuration, setCallDuration] = useState(0); @@ -55,49 +33,50 @@ export default function UserPhoneInterface({ const [currentTranscript, setCurrentTranscript] = useState(''); const [lastAgentMessage, setLastAgentMessage] = useState(''); const [agentStatus, setAgentStatus] = useState<'idle' | 'speaking' | 'listening' | 'processing'>('idle'); - const [language, setLanguage] = useState('en'); const [isMinimized, setIsMinimized] = useState(false); const [isEnding, setIsEnding] = useState(false); - const [, setVoiceProvider] = useState(() => getGroqSettings().voiceProvider); + const [connectionError, setConnectionError] = useState(null); - const recognitionRef = useRef(null); const timerRef = useRef(null); - const isRecognitionRunningRef = useRef(false); - - // Refs to avoid stale closures in speech recognition callbacks - const currentCallRef = useRef(currentCall); - const agentStatusRef = useRef(agentStatus); - // eslint-disable-next-line @typescript-eslint/no-explicit-any - const handleUserInputRef = useRef<((text: string) => Promise) | null>(null); - - // Keep refs in sync with state - useEffect(() => { currentCallRef.current = currentCall; }, [currentCall]); - useEffect(() => { agentStatusRef.current = agentStatus; }, [agentStatus]); - // Note: handleUserInputRef is synced after handleUserInput is defined below - const messages = currentCall?.messages || []; - const extractedFields = currentCall?.extractedFields || []; - - // Initialize AI service with knowledge base and sync voice provider - useEffect(() => { - aiService.setKnowledgeBase(knowledgeBase); - }, [knowledgeBase]); - - // Sync voice provider from settings changes - useEffect(() => { - const handleSettingsChange = (e: Event) => { - const detail = (e as CustomEvent).detail; - if (detail?.voiceProvider) { - setVoiceProvider(detail.voiceProvider); - ttsService.setVoiceProvider(detail.voiceProvider); + // ElevenLabs Conversational AI — handles STT + LLM + TTS in one WebSocket + const conversation = useConversation({ + onConnect: () => { + console.log('📞 ElevenLabs Conversational AI connected'); + setConnectionError(null); + setAgentStatus('listening'); + }, + onDisconnect: () => { + console.log('📞 ElevenLabs Conversational AI disconnected'); + setAgentStatus('idle'); + }, + onMessage: (message: { source: string; message: string }) => { + console.log(`📞 [${message.source}]: ${message.message}`); + if (message.source === 'user') { + setCurrentTranscript(message.message); + addMessage('user', message.message); + onTranscript?.(message.message, 'user'); + } else if (message.source === 'ai') { + setCurrentTranscript(''); + setLastAgentMessage(message.message); + addMessage('agent', message.message); + onTranscript?.(message.message, 'agent'); } - }; - window.addEventListener('groq-settings-updated', handleSettingsChange); - // Sync on mount - const settings = getGroqSettings(); - ttsService.setVoiceProvider(settings.voiceProvider); - return () => window.removeEventListener('groq-settings-updated', handleSettingsChange); - }, []); + }, + onError: (error: string) => { + console.error('📞 ElevenLabs error:', error); + setConnectionError(typeof error === 'string' ? error : 'Connection error'); + }, + onModeChange: (mode: { mode: string }) => { + console.log('📞 Mode:', mode.mode); + if (mode.mode === 'speaking') { + setAgentStatus('speaking'); + setCurrentTranscript(''); + } else if (mode.mode === 'listening') { + setAgentStatus('listening'); + } + }, + }); // Timer for call duration useEffect(() => { @@ -120,151 +99,13 @@ export default function UserPhoneInterface({ }; }, [currentCall?.status]); - // Auto-start recognition when call becomes active (for overlay mode when started externally) - useEffect(() => { - if (currentCall?.status === 'active' && currentCall?.type === 'voice' && mode === 'overlay') { - // Small delay to ensure component is ready - const timer = setTimeout(() => { - if (agentStatus === 'idle') { - console.log('📞 Call started externally, starting recognition in overlay mode'); - setAgentStatus('listening'); - } - if (!isRecognitionRunningRef.current) { - try { - recognitionRef.current?.start(); - isRecognitionRunningRef.current = true; - console.log('🎤 Recognition started for external call'); - } catch (e) { - console.error('Error auto-starting recognition:', e); - } - } - }, 200); - return () => clearTimeout(timer); - } - }, [currentCall?.status, currentCall?.type, mode, agentStatus]); - - // Helper functions for starting/stopping recognition safely - const startRecognition = useCallback(() => { - if (!recognitionRef.current || isRecognitionRunningRef.current) { - console.log('🎤 Recognition already running or not available'); - return; - } - try { - console.log('🎤 Starting speech recognition...'); - recognitionRef.current.start(); - isRecognitionRunningRef.current = true; - } catch (e) { - console.error('Error starting recognition:', e); - isRecognitionRunningRef.current = false; - } - }, []); - - const stopRecognition = useCallback(() => { - if (!recognitionRef.current) return; - try { - recognitionRef.current.stop(); - isRecognitionRunningRef.current = false; - } catch (e) { - console.error('Error stopping recognition:', e); - } - }, []); - - // Initialize speech recognition + // Sync volume with speaker toggle useEffect(() => { - // eslint-disable-next-line @typescript-eslint/no-explicit-any - const SpeechRecognition = (window as any).SpeechRecognition || (window as any).webkitSpeechRecognition; - if (SpeechRecognition) { - // Cleanup previous instance if it exists - if (recognitionRef.current) { - try { - recognitionRef.current.abort(); - } catch (e) { - // Ignore - } - } - - console.log(`🎤 Initializing speech recognition for language: ${language} (${TTS_LANGUAGES[language].code})`); - - const recognition = new SpeechRecognition(); - recognition.continuous = true; - recognition.interimResults = true; - recognition.lang = TTS_LANGUAGES[language].code; - recognitionRef.current = recognition; - - recognition.onresult = (event: SpeechRecognitionEvent) => { - const current = event.resultIndex; - const transcriptText = event.results[current][0].transcript; - setCurrentTranscript(transcriptText); - - if (event.results[current].isFinal) { - // Use ref to get latest handleUserInput callback - handleUserInputRef.current?.(transcriptText); - } - }; - - recognition.onerror = (event: SpeechRecognitionErrorEvent) => { - if (event.error !== 'no-speech' && event.error !== 'aborted') { - console.error('Speech recognition error:', event.error); - } - - isRecognitionRunningRef.current = false; - - if (event.error === 'not-allowed') { - setAgentStatus('idle'); - alert('Please allow microphone access.'); - } - }; - - recognition.onend = () => { - isRecognitionRunningRef.current = false; - // Restart if still in call and listening - use refs for latest values - const isActive = currentCallRef.current?.status === 'active'; - const status = agentStatusRef.current; - - console.log('🎤 Speech recognition ended:', { isActive, status, language }); - - if (isActive && status === 'listening') { - // Small delay to prevent rapid restart issues - setTimeout(() => { - if (currentCallRef.current?.status === 'active' && agentStatusRef.current === 'listening') { - try { - console.log('🎤 Restarting speech recognition...'); - recognitionRef.current?.start(); - isRecognitionRunningRef.current = true; - } catch (e) { - console.error('Error restarting recognition:', e); - isRecognitionRunningRef.current = false; - } - } - }, 300); - } - }; - - // If we are already in listening mode (e.g. after language switch), restart immediately - if (agentStatus === 'listening') { - try { - console.log('🎤 Auto-restarting recognition after language switch'); - recognition.start(); - isRecognitionRunningRef.current = true; - } catch (e) { - console.warn('Could not auto-start recognition:', e); - } - } + if (conversation.status === 'connected') { + conversation.setVolume({ volume: isSpeakerOn ? 1 : 0 }); } + }, [isSpeakerOn, conversation.status]); - return () => { - if (recognitionRef.current) { - try { - recognitionRef.current.abort(); - } catch (e) { - // Ignore - } - isRecognitionRunningRef.current = false; - } - }; - }, [language]); - - // Helper for time formatting const formatDuration = (seconds: number) => { const mins = Math.floor(seconds / 60); const secs = seconds % 60; @@ -273,237 +114,72 @@ export default function UserPhoneInterface({ const isActive = currentCall?.status === 'active'; - // System Status Checks - const [systemChecks, setSystemChecks] = useState<{ label: string; status: 'pending' | 'success' | 'warning' | 'error' }[]>([]); - - useEffect(() => { - const runChecks = async () => { - // Initial state - setSystemChecks([ - { label: 'Initializing Neural Engine', status: 'pending' }, - ]); - - await new Promise(r => setTimeout(r, 600)); - - setSystemChecks(prev => [ - { ...prev[0], status: 'success' }, - { label: 'Connecting to Knowledge Base', status: 'pending' } - ]); - - await new Promise(r => setTimeout(r, 800)); - - // Check ElevenLabs - const elevenLabsAvailable = ttsService.isElevenLabsAvailable(); - - setSystemChecks(prev => [ - prev[0], - { ...prev[1], status: 'success' }, - { label: 'Verifying Voice Synthesis (DE)', status: 'pending' } - ]); - - await new Promise(r => setTimeout(r, 800)); - - setSystemChecks(prev => [ - prev[0], - prev[1], - { - label: elevenLabsAvailable ? 'Voice Synthesis Active (ElevenLabs)' : 'German Voice Unavailable (No Key)', - status: elevenLabsAvailable ? 'success' : 'warning' - }, - { label: 'System Ready', status: 'success' } - ]); - }; - - if (!isActive) { - runChecks(); - } - }, [isActive]); - - - const speakText = useCallback((text: string) => { - if (!isSpeakerOn) return Promise.resolve(); - - // Check if German is requested but unavailable - if (language === 'de' && !ttsService.isElevenLabsAvailable()) { - console.warn('🇩🇪 German TTS unavailable (Missing API Key). Skipping audio.'); - - // Just simulate the timing of speech or jump straight to listening - setAgentStatus('speaking'); - setTimeout(() => { - setAgentStatus('listening'); - startRecognition(); - }, 1000); - - return Promise.resolve(); - } - - // Get the selected voice from knowledge base - const selectedVoice = ttsService.resolveOrpheusVoiceId(knowledgeBase.selectedVoiceId); - - // Stop recognition before speaking to avoid conflicts - stopRecognition(); - - return ttsService.speakStreaming(text, { - orpheusVoice: selectedVoice, - speed: 1.0, - language, // Use selected language (en = Groq, de = ElevenLabs) - onStart: () => setAgentStatus('speaking'), - onEnd: () => { - setAgentStatus('listening'); - // Start listening after speaking - startRecognition(); - }, - onError: (error) => { - console.error('TTS error:', error); - setAgentStatus('listening'); - startRecognition(); - } + // Fetch signed URL from our Netlify function (keeps API key server-side) + const getSignedUrl = useCallback(async (): Promise => { + const response = await fetch('/api/elevenlabs-signed-url', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, }); - }, [knowledgeBase.selectedVoiceId, isSpeakerOn, language, startRecognition, stopRecognition]); - - const handleUserInput = useCallback(async (text: string) => { - if (!text.trim()) return; - - addMessage('user', text); - // ... (rest of function is fine) - onTranscript?.(text, 'user'); - setCurrentTranscript(''); - setAgentStatus('processing'); - - // Stop recognition while processing - stopRecognition(); - // Use AI service to generate response - const response = await aiService.generateResponse( - text, - messages, - extractedFields - ); - - // Update extracted fields - if (response.extractedFields) { - response.extractedFields.forEach(field => { - updateExtractedField(field); - }); + if (!response.ok) { + const errorData = await response.json().catch(() => ({ error: `HTTP ${response.status}` })); + throw new Error(errorData.error || `HTTP ${response.status}`); } - // Update priority if suggested - if (response.suggestedPriority) { - setCallPriority(response.suggestedPriority); + const data = await response.json(); + if (!data.signed_url) { + throw new Error('No signed_url in response'); } - // Update category if suggested - if (response.suggestedCategory) { - setCallCategory(response.suggestedCategory); - } - - addMessage('agent', response.text); - setLastAgentMessage(response.text); - onTranscript?.(response.text, 'agent'); - await speakText(response.text); - }, [addMessage, onTranscript, messages, extractedFields, updateExtractedField, setCallPriority, setCallCategory, speakText, stopRecognition]); - - // Keep handleUserInputRef in sync (must be after handleUserInput is defined) - useEffect(() => { handleUserInputRef.current = handleUserInput; }, [handleUserInput]); + return data.signed_url; + }, []); const handleStartCall = useCallback(async () => { - // Unlock audio playback (required by browser autoplay policies) - // This must happen during the user gesture (click) - await ttsService.unlockAudio(); - - // Reset AI conversation state for new call - aiService.resetState(); - + setConnectionError(null); + setIsEnding(false); startCall('voice'); + setAgentStatus('processing'); - // - // Both DeAPI and ElevenLabs use the same flow: - // Browser Speech Recognition → Groq LLM → selected TTS provider - // The ttsService.speak() routes to the correct TTS backend automatically - // - // Speak a greeting based on selected language - const greeting = language === 'de' - ? 'Hallo! Wie kann ich Ihnen helfen?' - : 'Hello! How can I help you today?'; - - setLastAgentMessage(greeting); - addMessage('agent', greeting); - setAgentStatus('speaking'); - - // Check if we can speak German - if (language === 'de' && !ttsService.isElevenLabsAvailable()) { - // Fake it for the greeting if key missing - setTimeout(() => { - setAgentStatus('listening'); - startRecognition(); - }, 1000); - return; - } - - // Use TTS for the greeting try { - // Get selected voice - const selectedVoice = ttsService.resolveOrpheusVoiceId(knowledgeBase.selectedVoiceId); - - await ttsService.speak(greeting, { - orpheusVoice: selectedVoice, - speed: 1.0, - language, // Use selected language - onStart: () => { - console.log('🔊 TTS greeting started'); - }, - onEnd: () => { - console.log('🔊 Initial greeting finished, starting recognition'); - setAgentStatus('listening'); - startRecognition(); - }, - onError: (error) => { - console.error('🔊 Greeting TTS error:', error); - setAgentStatus('listening'); - startRecognition(); - } + // Get signed URL from our server (keeps API key safe) + const signedUrl = await getSignedUrl(); + + // Start ElevenLabs Conversational AI session + // This single WebSocket connection handles STT + LLM + TTS + await conversation.startSession({ + signedUrl, }); } catch (error) { - console.error('🔊 Greeting TTS failed:', error); - setAgentStatus('listening'); - startRecognition(); + console.error('📞 Failed to start ElevenLabs session:', error); + setConnectionError(error instanceof Error ? error.message : 'Failed to connect'); + setAgentStatus('idle'); } - }, [startCall, startRecognition, language, addMessage, onTranscript]); - - // No longer auto-start — we show a "Start Call" button instead - // to ensure AudioContext is created during a real user gesture + }, [startCall, getSignedUrl, conversation]); const handleEndCall = useCallback(async () => { console.log('🔴 End call button pressed'); setIsEnding(true); - // Stop all TTS immediately - ttsService.stop(); - - // Stop speech recognition - stopRecognition(); - if (recognitionRef.current) { - recognitionRef.current.abort(); - isRecognitionRunningRef.current = false; - } + // End ElevenLabs session + await conversation.endSession(); setAgentStatus('idle'); - - // Wait for AI summary to be generated before ending await endCall(); setCurrentTranscript(''); setLastAgentMessage(''); + }, [endCall, conversation]); - // Final TTS cleanup after a short delay - setTimeout(() => { - ttsService.stop(); - }, 100); - }, [endCall, stopRecognition]); - - + // Cleanup on unmount + useEffect(() => { + return () => { + if (conversation.status === 'connected') { + conversation.endSession(); + } + }; + }, []); - // ... (Compact View - Unchanged) + // Compact minimized view if (isActive && isMinimized && currentCall?.type === 'voice') { return (
@@ -546,9 +222,8 @@ export default function UserPhoneInterface({ ); } - // ... (Full Screen View - Unchanged mainly) + // Full screen active call view if (isActive && currentCall?.type === 'voice') { - // ... (existing code) return (
{/* Background Layer */} @@ -607,7 +282,7 @@ export default function UserPhoneInterface({ AI - {/* Status Rings - Simplified and aligned */} + {/* Status Rings */} {agentStatus === 'listening' && (
- {/* Caption Box - Darker Glassy */} + {/* Caption Box */}
{(currentTranscript || lastAgentMessage) && ( @@ -663,27 +338,10 @@ export default function UserPhoneInterface({
- {/* Bottom Controls - Fully aligned */} + {/* Bottom Controls */}
- {/* Language Toggle */} - - {/* Speaker */} - {/* End Call - Glassy Red */} + {/* End Call */}