diff --git a/internal/site/components/microphone-button.tsx b/internal/site/components/microphone-button.tsx index f7ac78c2..dac1ff7f 100644 --- a/internal/site/components/microphone-button.tsx +++ b/internal/site/components/microphone-button.tsx @@ -6,6 +6,56 @@ import { toast } from "sonner"; import { LoaderIcon } from "./icons"; import { Button } from "./ui/button"; +// TypeScript declarations for Web Speech API +interface SpeechRecognitionEvent extends Event { + resultIndex: number; + results: SpeechRecognitionResultList; +} + +interface SpeechRecognitionErrorEvent extends Event { + error: string; + message?: string; +} + +interface SpeechRecognition extends EventTarget { + continuous: boolean; + interimResults: boolean; + lang: string; + start(): void; + stop(): void; + abort(): void; + onresult: ((event: SpeechRecognitionEvent) => void) | null; + onerror: ((event: SpeechRecognitionErrorEvent) => void) | null; + onend: (() => void) | null; + onstart: (() => void) | null; +} + +declare global { + interface Window { + SpeechRecognition?: new () => SpeechRecognition; + webkitSpeechRecognition?: new () => SpeechRecognition; + } +} + +/** + * Check if the Web Speech API is supported in the current browser. + */ +function isSpeechRecognitionSupported(): boolean { + if (typeof window === "undefined") return false; + return !!(window.SpeechRecognition || window.webkitSpeechRecognition); +} + +/** + * Create a SpeechRecognition instance if supported. + */ +function createSpeechRecognition(): SpeechRecognition | null { + if (typeof window === "undefined") return null; + const SpeechRecognitionClass = + window.SpeechRecognition || window.webkitSpeechRecognition; + if (!SpeechRecognitionClass) return null; + return new SpeechRecognitionClass(); +} + interface MicrophoneButtonProps { onTranscript: (text: string) => void; disabled?: boolean; @@ -18,137 +68,99 @@ export function MicrophoneButton({ onRecordingStateChange, }: MicrophoneButtonProps) { const [isRecording, setIsRecording] = useState(false); - const [isTranscribing, setIsTranscribing] = useState(false); - const mediaRecorderRef = useRef(null); - const streamRef = useRef(null); - const chunksRef = useRef([]); - - const stopRecording = useCallback(async () => { - if (mediaRecorderRef.current && isRecording) { - mediaRecorderRef.current.stop(); - setIsRecording(false); - } - if (streamRef.current) { - streamRef.current.getTracks().forEach((track) => track.stop()); - streamRef.current = null; + const [isSupported, setIsSupported] = useState(true); + const recognitionRef = useRef(null); + const transcriptRef = useRef(""); + + // Check for browser support on mount + useEffect(() => { + setIsSupported(isSpeechRecognitionSupported()); + }, []); + + const stopRecording = useCallback(() => { + if (recognitionRef.current) { + recognitionRef.current.stop(); } - }, [isRecording]); - - const getSupportedMimeType = () => { - const candidates = [ - "audio/webm;codecs=opus", - "audio/webm", - "audio/ogg;codecs=opus", - "audio/ogg", - "audio/mp4", - ]; - for (const type of candidates) { - try { - if ( - typeof window !== "undefined" && - "MediaRecorder" in window && - MediaRecorder.isTypeSupported(type) - ) { - return type; - } - } catch {} + }, []); + + const startRecording = useCallback(() => { + const recognition = createSpeechRecognition(); + if (!recognition) { + toast.error("Speech recognition is not supported in this browser."); + return; } - return undefined; - }; - const startRecording = useCallback(async () => { - try { - let stream: MediaStream | null = null; - try { - stream = await navigator.mediaDevices.getUserMedia({ - audio: { - echoCancellation: true, - noiseSuppression: true, - channelCount: 1, - }, - }); - } catch (err) { - stream = await navigator.mediaDevices.getUserMedia({ audio: true }); - } + recognitionRef.current = recognition; + transcriptRef.current = ""; - if (!stream) { - throw new Error("No audio stream available"); - } + // Configure recognition + recognition.continuous = true; + recognition.interimResults = true; + recognition.lang = navigator.language || "en-US"; - streamRef.current = stream; + recognition.onstart = () => { + setIsRecording(true); + }; - const mimeType = getSupportedMimeType(); - const mediaRecorder = new MediaRecorder( - stream, - mimeType ? { mimeType } : undefined - ); - mediaRecorderRef.current = mediaRecorder; - chunksRef.current = []; + recognition.onresult = (event: SpeechRecognitionEvent) => { + let finalTranscript = ""; - mediaRecorder.ondataavailable = (event) => { - if (event.data.size > 0) { - chunksRef.current.push(event.data); + for (let i = event.resultIndex; i < event.results.length; i++) { + const result = event.results[i]; + if (result && result[0]) { + if (result.isFinal) { + finalTranscript += result[0].transcript; + } } - }; + } - mediaRecorder.onstop = async () => { - const usedType = (mimeType ?? mediaRecorder.mimeType) || "audio/webm"; - const audioBlob = new Blob(chunksRef.current, { type: usedType }); - await transcribeAudio(audioBlob); - }; + if (finalTranscript) { + transcriptRef.current += finalTranscript; + } + }; - mediaRecorder.start(); - setIsRecording(true); - } catch (error: any) { - console.error("Error starting recording:", error); - let message = "Failed to start recording."; - if ( - error?.name === "NotAllowedError" || - error?.name === "SecurityError" - ) { + recognition.onerror = (event: SpeechRecognitionErrorEvent) => { + console.error("Speech recognition error:", event.error); + + // Don't show error for aborted (user stopped) or no-speech + if (event.error === "aborted" || event.error === "no-speech") { + return; + } + + let message = "Speech recognition failed. Please try again."; + if (event.error === "not-allowed") { message = - "Microphone access was blocked. Enable it in your browser/site settings."; - } else if ( - error?.name === "NotFoundError" || - error?.name === "DevicesNotFoundError" - ) { + "Microphone access was blocked. Enable it in your browser settings."; + } else if (event.error === "network") { + message = "Network error during speech recognition."; + } else if (event.error === "audio-capture") { message = "No microphone found. Check your input device."; - } else if (error?.name === "NotReadableError") { - message = "Microphone is in use by another application."; } + toast.error(message); - } - }, []); + }; - const transcribeAudio = useCallback( - async (audioBlob: Blob) => { - setIsTranscribing(true); - try { - const formData = new FormData(); - formData.append("audio", audioBlob); + recognition.onend = () => { + setIsRecording(false); - const response = await fetch("/api/speech-to-text", { - method: "POST", - body: formData, - }); + // Send the accumulated transcript + const finalText = transcriptRef.current.trim(); + if (finalText) { + onTranscript(finalText); + } - if (!response.ok) { - throw new Error("Transcription failed"); - } + recognitionRef.current = null; + transcriptRef.current = ""; + }; - const result = (await response.json()) as { text: string }; - if (result.text) { - onTranscript(result.text); - } - } catch (error) { - console.error("Transcription error:", error); - toast.error("Speech transcription failed. Please try again."); - } finally { - setIsTranscribing(false); - } - }, - [onTranscript] - ); + try { + recognition.start(); + } catch (error) { + console.error("Error starting speech recognition:", error); + toast.error("Failed to start speech recognition."); + setIsRecording(false); + } + }, [onTranscript]); const handleToggleRecording = useCallback(() => { if (isRecording) { @@ -162,22 +174,21 @@ export function MicrophoneButton({ onRecordingStateChange?.(isRecording); }, [isRecording, onRecordingStateChange]); + // Cleanup on unmount useEffect(() => { return () => { - if (streamRef.current) { - streamRef.current.getTracks().forEach((track) => track.stop()); + if (recognitionRef.current) { + recognitionRef.current.abort(); } }; }, []); + // Don't render the button if speech recognition is not supported + if (!isSupported) { + return null; + } + const getButtonIcon = () => { - if (isTranscribing) { - return ( -
- -
- ); - } if (isRecording) { return ; } @@ -192,7 +203,6 @@ export function MicrophoneButton({ }; const getTooltipText = () => { - if (isTranscribing) return "Processing speech..."; if (isRecording) return "Stop recording"; return "Start voice input"; }; @@ -204,7 +214,7 @@ export function MicrophoneButton({ isRecording ? "bg-red-600 hover:bg-red-700 border-red-600" : "" }`} onClick={handleToggleRecording} - disabled={disabled || isTranscribing} + disabled={disabled} variant={getButtonVariant()} title={getTooltipText()} >