diff --git a/app/frontend/src/api/models.ts b/app/frontend/src/api/models.ts index 3ac5590bf5..c8d264e362 100644 --- a/app/frontend/src/api/models.ts +++ b/app/frontend/src/api/models.ts @@ -92,3 +92,11 @@ export type Config = { export type SimpleAPIResponse = { message?: string; }; + +export interface SpeechConfig { + speechUrls: (string | null)[]; + setSpeechUrls: (urls: (string | null)[]) => void; + audio: HTMLAudioElement; + isPlaying: boolean; + setIsPlaying: (isPlaying: boolean) => void; +} diff --git a/app/frontend/src/components/Answer/Answer.tsx b/app/frontend/src/components/Answer/Answer.tsx index c6d77748f2..22f182f64d 100644 --- a/app/frontend/src/components/Answer/Answer.tsx +++ b/app/frontend/src/components/Answer/Answer.tsx @@ -6,7 +6,7 @@ import remarkGfm from "remark-gfm"; import rehypeRaw from "rehype-raw"; import styles from "./Answer.module.css"; -import { ChatAppResponse, getCitationFilePath } from "../../api"; +import { ChatAppResponse, getCitationFilePath, SpeechConfig } from "../../api"; import { parseAnswerToHtml } from "./AnswerParser"; import { AnswerIcon } from "./AnswerIcon"; import { SpeechOutputBrowser } from "./SpeechOutputBrowser"; @@ -14,6 +14,8 @@ import { SpeechOutputAzure } from "./SpeechOutputAzure"; interface Props { answer: ChatAppResponse; + index: number; + speechConfig: SpeechConfig; isSelected?: boolean; isStreaming: boolean; onCitationClicked: (filePath: string) => void; @@ -23,11 +25,12 @@ interface Props { showFollowupQuestions?: boolean; showSpeechOutputBrowser?: boolean; showSpeechOutputAzure?: boolean; - speechUrl: string | null; } export const Answer = ({ answer, + index, + speechConfig, isSelected, isStreaming, onCitationClicked, @@ -36,13 +39,11 @@ export const Answer = ({ onFollowupQuestionClicked, showFollowupQuestions, showSpeechOutputAzure, - showSpeechOutputBrowser, - speechUrl + showSpeechOutputBrowser }: Props) => { const followupQuestions = answer.context?.followup_questions; const messageContent = answer.message.content; const parsedAnswer = useMemo(() => parseAnswerToHtml(messageContent, isStreaming, onCitationClicked), [answer]); - const sanitizedAnswerHtml = DOMPurify.sanitize(parsedAnswer.answerHtml); return ( @@ -67,7 +68,9 @@ export const Answer = ({ onClick={() => onSupportingContentClicked()} disabled={!answer.context.data_points} /> - {showSpeechOutputAzure && } + {showSpeechOutputAzure && ( + + )} {showSpeechOutputBrowser && } diff --git a/app/frontend/src/components/Answer/SpeechOutputAzure.tsx b/app/frontend/src/components/Answer/SpeechOutputAzure.tsx index 214b5a0191..61087c3659 100644 --- a/app/frontend/src/components/Answer/SpeechOutputAzure.tsx +++ b/app/frontend/src/components/Answer/SpeechOutputAzure.tsx @@ -1,44 +1,80 @@ import { useState } from "react"; import { IconButton } from "@fluentui/react"; +import { getSpeechApi, SpeechConfig } from "../../api"; interface Props { - url: string | null; + answer: string; + speechConfig: SpeechConfig; + index: number; + isStreaming: boolean; } -let audio = new Audio(); +export const SpeechOutputAzure = ({ answer, speechConfig, index, isStreaming }: Props) => { + const [isLoading, setIsLoading] = useState(false); + const [localPlayingState, setLocalPlayingState] = useState(false); -export const SpeechOutputAzure = ({ url }: Props) => { - const [isPlaying, setIsPlaying] = useState(false); + const playAudio = async (url: string) => { + speechConfig.audio.src = url; + await speechConfig.audio + .play() + .then(() => { + speechConfig.audio.onended = () => { + speechConfig.setIsPlaying(false); + setLocalPlayingState(false); + }; + speechConfig.setIsPlaying(true); + setLocalPlayingState(true); + }) + .catch(() => { + alert("Failed to play speech output."); + console.error("Failed to play speech output."); + speechConfig.setIsPlaying(false); + setLocalPlayingState(false); + }); + }; - const startOrStopAudio = async () => { - if (isPlaying) { - audio.pause(); - setIsPlaying(false); + const startOrStopSpeech = async (answer: string) => { + if (speechConfig.isPlaying) { + speechConfig.audio.pause(); + speechConfig.audio.currentTime = 0; + speechConfig.setIsPlaying(false); + setLocalPlayingState(false); return; } - - if (!url) { - console.error("Speech output is not yet available."); + if (speechConfig.speechUrls[index]) { + playAudio(speechConfig.speechUrls[index]); return; } - audio = new Audio(url); - await audio.play(); - audio.addEventListener("ended", () => { - setIsPlaying(false); + setIsLoading(true); + await getSpeechApi(answer).then(async speechUrl => { + if (!speechUrl) { + alert("Speech output is not available."); + console.error("Speech output is not available."); + return; + } + setIsLoading(false); + speechConfig.setSpeechUrls(speechConfig.speechUrls.map((url, i) => (i === index ? speechUrl : url))); + playAudio(speechUrl); }); - setIsPlaying(true); }; - const color = isPlaying ? "red" : "black"; - return ( - startOrStopAudio()} - disabled={!url} - /> + const color = localPlayingState ? "red" : "black"; + + // We always preload the Sync icon in hidden mode so that there's no visual glitch when icon changes + return isLoading ? ( + + ) : ( + <> + + startOrStopSpeech(answer)} + disabled={isStreaming} + /> + ); }; diff --git a/app/frontend/src/pages/ask/Ask.tsx b/app/frontend/src/pages/ask/Ask.tsx index 84d1ea87e3..10f6d7e2d5 100644 --- a/app/frontend/src/pages/ask/Ask.tsx +++ b/app/frontend/src/pages/ask/Ask.tsx @@ -4,7 +4,7 @@ import { useId } from "@fluentui/react-hooks"; import styles from "./Ask.module.css"; -import { askApi, configApi, getSpeechApi, ChatAppResponse, ChatAppRequest, RetrievalMode, VectorFieldOptions, GPT4VInput } from "../../api"; +import { askApi, configApi, ChatAppResponse, ChatAppRequest, RetrievalMode, VectorFieldOptions, GPT4VInput, SpeechConfig } from "../../api"; import { Answer, AnswerError } from "../../components/Answer"; import { QuestionInput } from "../../components/QuestionInput"; import { ExampleList } from "../../components/Example"; @@ -48,13 +48,24 @@ export function Component(): JSX.Element { const [showSpeechInput, setShowSpeechInput] = useState(false); const [showSpeechOutputBrowser, setShowSpeechOutputBrowser] = useState(false); const [showSpeechOutputAzure, setShowSpeechOutputAzure] = useState(false); + const audio = useRef(new Audio()).current; + const [isPlaying, setIsPlaying] = useState(false); const lastQuestionRef = useRef(""); const [isLoading, setIsLoading] = useState(false); const [error, setError] = useState(); const [answer, setAnswer] = useState(); - const [speechUrl, setSpeechUrl] = useState(null); + // For the Ask tab, this array will hold a maximum of one URL + const [speechUrls, setSpeechUrls] = useState<(string | null)[]>([]); + + const speechConfig: SpeechConfig = { + speechUrls, + setSpeechUrls, + audio, + isPlaying, + setIsPlaying + }; const [activeCitation, setActiveCitation] = useState(); const [activeAnalysisPanelTab, setActiveAnalysisPanelTab] = useState(undefined); @@ -82,14 +93,6 @@ export function Component(): JSX.Element { getConfig(); }, []); - useEffect(() => { - if (answer && showSpeechOutputAzure) { - getSpeechApi(answer.message.content).then(speechUrl => { - setSpeechUrl(speechUrl); - }); - } - }, [answer]); - const makeApiRequest = async (question: string) => { lastQuestionRef.current = question; @@ -134,7 +137,7 @@ export function Component(): JSX.Element { }; const result = await askApi(request, token); setAnswer(result); - setSpeechUrl(null); + setSpeechUrls([null]); } catch (e) { setError(e); } finally { @@ -256,13 +259,14 @@ export function Component(): JSX.Element {
onShowCitation(x)} onThoughtProcessClicked={() => onToggleTab(AnalysisPanelTabs.ThoughtProcessTab)} onSupportingContentClicked={() => onToggleTab(AnalysisPanelTabs.SupportingContentTab)} showSpeechOutputAzure={showSpeechOutputAzure} showSpeechOutputBrowser={showSpeechOutputBrowser} - speechUrl={speechUrl} />
)} diff --git a/app/frontend/src/pages/chat/Chat.tsx b/app/frontend/src/pages/chat/Chat.tsx index 99725263ab..248788c5b6 100644 --- a/app/frontend/src/pages/chat/Chat.tsx +++ b/app/frontend/src/pages/chat/Chat.tsx @@ -9,14 +9,14 @@ import styles from "./Chat.module.css"; import { chatApi, configApi, - getSpeechApi, RetrievalMode, ChatAppResponse, ChatAppResponseOrError, ChatAppRequest, ResponseMessage, VectorFieldOptions, - GPT4VInput + GPT4VInput, + SpeechConfig } from "../../api"; import { Answer, AnswerError, AnswerLoading } from "../../components/Answer"; import { QuestionInput } from "../../components/QuestionInput"; @@ -77,6 +77,16 @@ const Chat = () => { const [showSpeechInput, setShowSpeechInput] = useState(false); const [showSpeechOutputBrowser, setShowSpeechOutputBrowser] = useState(false); const [showSpeechOutputAzure, setShowSpeechOutputAzure] = useState(false); + const audio = useRef(new Audio()).current; + const [isPlaying, setIsPlaying] = useState(false); + + const speechConfig: SpeechConfig = { + speechUrls, + setSpeechUrls, + audio, + isPlaying, + setIsPlaying + }; const getConfig = async () => { configApi().then(config => { @@ -199,6 +209,7 @@ const Chat = () => { } setAnswers([...answers, [question, parsedResponse as ChatAppResponse]]); } + setSpeechUrls([...speechUrls, null]); } catch (e) { setError(e); } finally { @@ -212,6 +223,7 @@ const Chat = () => { setActiveCitation(undefined); setActiveAnalysisPanelTab(undefined); setAnswers([]); + setSpeechUrls([]); setStreamedAnswers([]); setIsLoading(false); setIsStreaming(false); @@ -223,19 +235,6 @@ const Chat = () => { getConfig(); }, []); - useEffect(() => { - if (answers && showSpeechOutputAzure) { - // For each answer that is missing a speech URL, fetch the speech URL - for (let i = 0; i < answers.length; i++) { - if (!speechUrls[i]) { - getSpeechApi(answers[i][1].message.content).then(speechUrl => { - setSpeechUrls([...speechUrls.slice(0, i), speechUrl, ...speechUrls.slice(i + 1)]); - }); - } - } - } - }, [answers]); - const onPromptTemplateChange = (_ev?: React.FormEvent, newValue?: string) => { setPromptTemplate(newValue || ""); }; @@ -368,6 +367,8 @@ const Chat = () => { isStreaming={true} key={index} answer={streamedAnswer[1]} + index={index} + speechConfig={speechConfig} isSelected={false} onCitationClicked={c => onShowCitation(c, index)} onThoughtProcessClicked={() => onToggleTab(AnalysisPanelTabs.ThoughtProcessTab, index)} @@ -376,7 +377,6 @@ const Chat = () => { showFollowupQuestions={useSuggestFollowupQuestions && answers.length - 1 === index} showSpeechOutputAzure={showSpeechOutputAzure} showSpeechOutputBrowser={showSpeechOutputBrowser} - speechUrl={speechUrls[index]} /> @@ -390,6 +390,8 @@ const Chat = () => { isStreaming={false} key={index} answer={answer[1]} + index={index} + speechConfig={speechConfig} isSelected={selectedAnswer === index && activeAnalysisPanelTab !== undefined} onCitationClicked={c => onShowCitation(c, index)} onThoughtProcessClicked={() => onToggleTab(AnalysisPanelTabs.ThoughtProcessTab, index)} @@ -398,7 +400,6 @@ const Chat = () => { showFollowupQuestions={useSuggestFollowupQuestions && answers.length - 1 === index} showSpeechOutputAzure={showSpeechOutputAzure} showSpeechOutputBrowser={showSpeechOutputBrowser} - speechUrl={speechUrls[index]} />