From 7cf281efcc90bc3acc67b64df037a71c0b3e753b Mon Sep 17 00:00:00 2001 From: John Aziz Date: Thu, 8 Aug 2024 01:42:14 +0300 Subject: [PATCH 1/7] on demand speech, fix https://github.com/Azure-Samples/azure-search-openai-demo/issues/1892 --- app/frontend/src/components/Answer/Answer.tsx | 6 ++-- .../components/Answer/SpeechOutputAzure.tsx | 32 ++++++++++--------- app/frontend/src/pages/ask/Ask.tsx | 11 ------- app/frontend/src/pages/chat/Chat.tsx | 16 ---------- 4 files changed, 19 insertions(+), 46 deletions(-) diff --git a/app/frontend/src/components/Answer/Answer.tsx b/app/frontend/src/components/Answer/Answer.tsx index c6d77748f2..c606aaf199 100644 --- a/app/frontend/src/components/Answer/Answer.tsx +++ b/app/frontend/src/components/Answer/Answer.tsx @@ -23,7 +23,6 @@ interface Props { showFollowupQuestions?: boolean; showSpeechOutputBrowser?: boolean; showSpeechOutputAzure?: boolean; - speechUrl: string | null; } export const Answer = ({ @@ -36,8 +35,7 @@ export const Answer = ({ onFollowupQuestionClicked, showFollowupQuestions, showSpeechOutputAzure, - showSpeechOutputBrowser, - speechUrl + showSpeechOutputBrowser }: Props) => { const followupQuestions = answer.context?.followup_questions; const messageContent = answer.message.content; @@ -67,7 +65,7 @@ export const Answer = ({ onClick={() => onSupportingContentClicked()} disabled={!answer.context.data_points} /> - {showSpeechOutputAzure && } + {showSpeechOutputAzure && } {showSpeechOutputBrowser && } diff --git a/app/frontend/src/components/Answer/SpeechOutputAzure.tsx b/app/frontend/src/components/Answer/SpeechOutputAzure.tsx index 214b5a0191..6d1452976e 100644 --- a/app/frontend/src/components/Answer/SpeechOutputAzure.tsx +++ b/app/frontend/src/components/Answer/SpeechOutputAzure.tsx @@ -1,33 +1,36 @@ import { useState } from "react"; import { IconButton } from "@fluentui/react"; +import { getSpeechApi } from "../../api"; interface Props { - url: string | null; + answer: string; } let audio = new Audio(); -export const SpeechOutputAzure = ({ url }: Props) => { +export const SpeechOutputAzure = ({ answer }: Props) => { const [isPlaying, setIsPlaying] = useState(false); - const startOrStopAudio = async () => { + const startOrStopSpeech = async (answer: string) => { if (isPlaying) { audio.pause(); setIsPlaying(false); return; } - - if (!url) { - console.error("Speech output is not yet available."); - return; - } - audio = new Audio(url); - await audio.play(); - audio.addEventListener("ended", () => { - setIsPlaying(false); + await getSpeechApi(answer).then(async speechUrl => { + if (!speechUrl) { + alert("Speech output is not available."); + console.error("Speech output is not available."); + return; + } + audio = new Audio(speechUrl); + await audio.play(); + audio.addEventListener("ended", () => { + setIsPlaying(false); + }); + setIsPlaying(true); }); - setIsPlaying(true); }; const color = isPlaying ? "red" : "black"; @@ -37,8 +40,7 @@ export const SpeechOutputAzure = ({ url }: Props) => { iconProps={{ iconName: "Volume3" }} title="Speak answer" ariaLabel="Speak answer" - onClick={() => startOrStopAudio()} - disabled={!url} + onClick={() => startOrStopSpeech(answer)} /> ); }; diff --git a/app/frontend/src/pages/ask/Ask.tsx b/app/frontend/src/pages/ask/Ask.tsx index 84d1ea87e3..817bd69668 100644 --- a/app/frontend/src/pages/ask/Ask.tsx +++ b/app/frontend/src/pages/ask/Ask.tsx @@ -54,7 +54,6 @@ export function Component(): JSX.Element { const [isLoading, setIsLoading] = useState(false); const [error, setError] = useState(); const [answer, setAnswer] = useState(); - const [speechUrl, setSpeechUrl] = useState(null); const [activeCitation, setActiveCitation] = useState(); const [activeAnalysisPanelTab, setActiveAnalysisPanelTab] = useState(undefined); @@ -82,14 +81,6 @@ export function Component(): JSX.Element { getConfig(); }, []); - useEffect(() => { - if (answer && showSpeechOutputAzure) { - getSpeechApi(answer.message.content).then(speechUrl => { - setSpeechUrl(speechUrl); - }); - } - }, [answer]); - const makeApiRequest = async (question: string) => { lastQuestionRef.current = question; @@ -134,7 +125,6 @@ export function Component(): JSX.Element { }; const result = await askApi(request, token); setAnswer(result); - setSpeechUrl(null); } catch (e) { setError(e); } finally { @@ -262,7 +252,6 @@ export function Component(): JSX.Element { onSupportingContentClicked={() => onToggleTab(AnalysisPanelTabs.SupportingContentTab)} showSpeechOutputAzure={showSpeechOutputAzure} showSpeechOutputBrowser={showSpeechOutputBrowser} - speechUrl={speechUrl} /> )} diff --git a/app/frontend/src/pages/chat/Chat.tsx b/app/frontend/src/pages/chat/Chat.tsx index 99725263ab..9e52378c2c 100644 --- a/app/frontend/src/pages/chat/Chat.tsx +++ b/app/frontend/src/pages/chat/Chat.tsx @@ -68,7 +68,6 @@ const Chat = () => { const [selectedAnswer, setSelectedAnswer] = useState(0); const [answers, setAnswers] = useState<[user: string, response: ChatAppResponse][]>([]); const [streamedAnswers, setStreamedAnswers] = useState<[user: string, response: ChatAppResponse][]>([]); - const [speechUrls, setSpeechUrls] = useState<(string | null)[]>([]); const [showGPT4VOptions, setShowGPT4VOptions] = useState(false); const [showSemanticRankerOption, setShowSemanticRankerOption] = useState(false); @@ -223,19 +222,6 @@ const Chat = () => { getConfig(); }, []); - useEffect(() => { - if (answers && showSpeechOutputAzure) { - // For each answer that is missing a speech URL, fetch the speech URL - for (let i = 0; i < answers.length; i++) { - if (!speechUrls[i]) { - getSpeechApi(answers[i][1].message.content).then(speechUrl => { - setSpeechUrls([...speechUrls.slice(0, i), speechUrl, ...speechUrls.slice(i + 1)]); - }); - } - } - } - }, [answers]); - const onPromptTemplateChange = (_ev?: React.FormEvent, newValue?: string) => { setPromptTemplate(newValue || ""); }; @@ -376,7 +362,6 @@ const Chat = () => { showFollowupQuestions={useSuggestFollowupQuestions && answers.length - 1 === index} showSpeechOutputAzure={showSpeechOutputAzure} showSpeechOutputBrowser={showSpeechOutputBrowser} - speechUrl={speechUrls[index]} /> @@ -398,7 +383,6 @@ const Chat = () => { showFollowupQuestions={useSuggestFollowupQuestions && answers.length - 1 === index} showSpeechOutputAzure={showSpeechOutputAzure} showSpeechOutputBrowser={showSpeechOutputBrowser} - speechUrl={speechUrls[index]} /> From 4a285ee4a676a22d7392536d3b311f65b6f67223 Mon Sep 17 00:00:00 2001 From: John Aziz Date: Fri, 9 Aug 2024 00:11:28 +0300 Subject: [PATCH 2/7] cache speech urls --- app/frontend/src/components/Answer/Answer.tsx | 11 +++++-- .../components/Answer/SpeechOutputAzure.tsx | 33 ++++++++++++++----- app/frontend/src/pages/ask/Ask.tsx | 5 +++ app/frontend/src/pages/chat/Chat.tsx | 10 +++++- 4 files changed, 48 insertions(+), 11 deletions(-) diff --git a/app/frontend/src/components/Answer/Answer.tsx b/app/frontend/src/components/Answer/Answer.tsx index c606aaf199..7297a63401 100644 --- a/app/frontend/src/components/Answer/Answer.tsx +++ b/app/frontend/src/components/Answer/Answer.tsx @@ -14,6 +14,9 @@ import { SpeechOutputAzure } from "./SpeechOutputAzure"; interface Props { answer: ChatAppResponse; + index: number; + speechUrls: (string | null)[]; + updateSpeechUrls: (urls: (string | null)[]) => void; isSelected?: boolean; isStreaming: boolean; onCitationClicked: (filePath: string) => void; @@ -27,6 +30,9 @@ interface Props { export const Answer = ({ answer, + index, + speechUrls, + updateSpeechUrls, isSelected, isStreaming, onCitationClicked, @@ -40,7 +46,6 @@ export const Answer = ({ const followupQuestions = answer.context?.followup_questions; const messageContent = answer.message.content; const parsedAnswer = useMemo(() => parseAnswerToHtml(messageContent, isStreaming, onCitationClicked), [answer]); - const sanitizedAnswerHtml = DOMPurify.sanitize(parsedAnswer.answerHtml); return ( @@ -65,7 +70,9 @@ export const Answer = ({ onClick={() => onSupportingContentClicked()} disabled={!answer.context.data_points} /> - {showSpeechOutputAzure && } + {showSpeechOutputAzure && ( + + )} {showSpeechOutputBrowser && } diff --git a/app/frontend/src/components/Answer/SpeechOutputAzure.tsx b/app/frontend/src/components/Answer/SpeechOutputAzure.tsx index 6d1452976e..9e852982b3 100644 --- a/app/frontend/src/components/Answer/SpeechOutputAzure.tsx +++ b/app/frontend/src/components/Answer/SpeechOutputAzure.tsx @@ -5,12 +5,25 @@ import { getSpeechApi } from "../../api"; interface Props { answer: string; + urls: (string | null)[]; + updateSpeechUrls: (urls: (string | null)[]) => void; + index: number; } let audio = new Audio(); -export const SpeechOutputAzure = ({ answer }: Props) => { +export const SpeechOutputAzure = ({ answer, urls, updateSpeechUrls, index }: Props) => { const [isPlaying, setIsPlaying] = useState(false); + const [isLoading, setIsLoading] = useState(false); + + const playAudio = async (url: string) => { + audio = new Audio(url); + await audio.play(); + audio.addEventListener("ended", () => { + setIsPlaying(false); + }); + setIsPlaying(true); + }; const startOrStopSpeech = async (answer: string) => { if (isPlaying) { @@ -18,23 +31,27 @@ export const SpeechOutputAzure = ({ answer }: Props) => { setIsPlaying(false); return; } + if (urls[index]) { + playAudio(urls[index]); + return; + } + setIsLoading(true); await getSpeechApi(answer).then(async speechUrl => { if (!speechUrl) { alert("Speech output is not available."); console.error("Speech output is not available."); return; } - audio = new Audio(speechUrl); - await audio.play(); - audio.addEventListener("ended", () => { - setIsPlaying(false); - }); - setIsPlaying(true); + setIsLoading(false); + updateSpeechUrls(urls.map((url, i) => (i === index ? speechUrl : url))); + playAudio(speechUrl); }); }; const color = isPlaying ? "red" : "black"; - return ( + return isLoading ? ( + + ) : ( (false); const [error, setError] = useState(); const [answer, setAnswer] = useState(); + const [speechUrl, setSpeechUrl] = useState<(string | null)[]>([]); const [activeCitation, setActiveCitation] = useState(); const [activeAnalysisPanelTab, setActiveAnalysisPanelTab] = useState(undefined); @@ -125,6 +126,7 @@ export function Component(): JSX.Element { }; const result = await askApi(request, token); setAnswer(result); + setSpeechUrl([null]); } catch (e) { setError(e); } finally { @@ -246,6 +248,9 @@ export function Component(): JSX.Element {
onShowCitation(x)} onThoughtProcessClicked={() => onToggleTab(AnalysisPanelTabs.ThoughtProcessTab)} diff --git a/app/frontend/src/pages/chat/Chat.tsx b/app/frontend/src/pages/chat/Chat.tsx index 9e52378c2c..716ccfe015 100644 --- a/app/frontend/src/pages/chat/Chat.tsx +++ b/app/frontend/src/pages/chat/Chat.tsx @@ -9,7 +9,6 @@ import styles from "./Chat.module.css"; import { chatApi, configApi, - getSpeechApi, RetrievalMode, ChatAppResponse, ChatAppResponseOrError, @@ -67,6 +66,7 @@ const Chat = () => { const [selectedAnswer, setSelectedAnswer] = useState(0); const [answers, setAnswers] = useState<[user: string, response: ChatAppResponse][]>([]); + const [speechUrls, setSpeechUrls] = useState<(string | null)[]>([]); const [streamedAnswers, setStreamedAnswers] = useState<[user: string, response: ChatAppResponse][]>([]); const [showGPT4VOptions, setShowGPT4VOptions] = useState(false); @@ -198,6 +198,7 @@ const Chat = () => { } setAnswers([...answers, [question, parsedResponse as ChatAppResponse]]); } + setSpeechUrls([...speechUrls, null]); } catch (e) { setError(e); } finally { @@ -211,6 +212,7 @@ const Chat = () => { setActiveCitation(undefined); setActiveAnalysisPanelTab(undefined); setAnswers([]); + setSpeechUrls([]); setStreamedAnswers([]); setIsLoading(false); setIsStreaming(false); @@ -354,6 +356,9 @@ const Chat = () => { isStreaming={true} key={index} answer={streamedAnswer[1]} + index={index} + speechUrls={speechUrls} + updateSpeechUrls={setSpeechUrls} isSelected={false} onCitationClicked={c => onShowCitation(c, index)} onThoughtProcessClicked={() => onToggleTab(AnalysisPanelTabs.ThoughtProcessTab, index)} @@ -375,6 +380,9 @@ const Chat = () => { isStreaming={false} key={index} answer={answer[1]} + index={index} + speechUrls={speechUrls} + updateSpeechUrls={setSpeechUrls} isSelected={selectedAnswer === index && activeAnalysisPanelTab !== undefined} onCitationClicked={c => onShowCitation(c, index)} onThoughtProcessClicked={() => onToggleTab(AnalysisPanelTabs.ThoughtProcessTab, index)} From 9c551f3c986862bbcae7d7a23bf496963785758c Mon Sep 17 00:00:00 2001 From: John Aziz Date: Sun, 11 Aug 2024 17:15:48 +0300 Subject: [PATCH 3/7] maintain one audio source across app prevent speech generation while streaming the response --- app/frontend/src/components/Answer/Answer.tsx | 17 ++++++++- .../components/Answer/SpeechOutputAzure.tsx | 35 +++++++++++++------ app/frontend/src/pages/ask/Ask.tsx | 5 +++ app/frontend/src/pages/chat/Chat.tsx | 8 +++++ 4 files changed, 53 insertions(+), 12 deletions(-) diff --git a/app/frontend/src/components/Answer/Answer.tsx b/app/frontend/src/components/Answer/Answer.tsx index 7297a63401..f404f4b1b6 100644 --- a/app/frontend/src/components/Answer/Answer.tsx +++ b/app/frontend/src/components/Answer/Answer.tsx @@ -17,6 +17,9 @@ interface Props { index: number; speechUrls: (string | null)[]; updateSpeechUrls: (urls: (string | null)[]) => void; + audio: HTMLAudioElement; + isPlaying: boolean; + setIsPlaying: (isPlaying: boolean) => void; isSelected?: boolean; isStreaming: boolean; onCitationClicked: (filePath: string) => void; @@ -33,6 +36,9 @@ export const Answer = ({ index, speechUrls, updateSpeechUrls, + audio, + isPlaying, + setIsPlaying, isSelected, isStreaming, onCitationClicked, @@ -71,7 +77,16 @@ export const Answer = ({ disabled={!answer.context.data_points} /> {showSpeechOutputAzure && ( - + )} {showSpeechOutputBrowser && }
diff --git a/app/frontend/src/components/Answer/SpeechOutputAzure.tsx b/app/frontend/src/components/Answer/SpeechOutputAzure.tsx index 9e852982b3..7be823faf8 100644 --- a/app/frontend/src/components/Answer/SpeechOutputAzure.tsx +++ b/app/frontend/src/components/Answer/SpeechOutputAzure.tsx @@ -8,27 +8,39 @@ interface Props { urls: (string | null)[]; updateSpeechUrls: (urls: (string | null)[]) => void; index: number; + audio: HTMLAudioElement; + isPlaying: boolean; + setIsPlaying: (isPlaying: boolean) => void; + isStreaming: boolean; } -let audio = new Audio(); - -export const SpeechOutputAzure = ({ answer, urls, updateSpeechUrls, index }: Props) => { - const [isPlaying, setIsPlaying] = useState(false); +export const SpeechOutputAzure = ({ answer, urls, updateSpeechUrls, index, audio, isPlaying, setIsPlaying, isStreaming }: Props) => { const [isLoading, setIsLoading] = useState(false); + const [localPlayingState, setLocalPlayingState] = useState(false); const playAudio = async (url: string) => { - audio = new Audio(url); - await audio.play(); - audio.addEventListener("ended", () => { - setIsPlaying(false); - }); - setIsPlaying(true); + audio.src = url; + await audio + .play() + .then(() => { + audio.onended = () => setIsPlaying(false); + setIsPlaying(true); + setLocalPlayingState(true); + }) + .catch(() => { + alert("Failed to play speech output."); + console.error("Failed to play speech output."); + setIsPlaying(false); + setLocalPlayingState(false); + }); }; const startOrStopSpeech = async (answer: string) => { if (isPlaying) { audio.pause(); + audio.currentTime = 0; setIsPlaying(false); + setLocalPlayingState(false); return; } if (urls[index]) { @@ -48,7 +60,7 @@ export const SpeechOutputAzure = ({ answer, urls, updateSpeechUrls, index }: Pro }); }; - const color = isPlaying ? "red" : "black"; + const color = localPlayingState ? "red" : "black"; return isLoading ? ( ) : ( @@ -58,6 +70,7 @@ export const SpeechOutputAzure = ({ answer, urls, updateSpeechUrls, index }: Pro title="Speak answer" ariaLabel="Speak answer" onClick={() => startOrStopSpeech(answer)} + disabled={isStreaming} /> ); }; diff --git a/app/frontend/src/pages/ask/Ask.tsx b/app/frontend/src/pages/ask/Ask.tsx index b7b8e27ed6..e3eebb4549 100644 --- a/app/frontend/src/pages/ask/Ask.tsx +++ b/app/frontend/src/pages/ask/Ask.tsx @@ -48,6 +48,8 @@ export function Component(): JSX.Element { const [showSpeechInput, setShowSpeechInput] = useState(false); const [showSpeechOutputBrowser, setShowSpeechOutputBrowser] = useState(false); const [showSpeechOutputAzure, setShowSpeechOutputAzure] = useState(false); + const audio = useRef(new Audio()).current; + const [isPlaying, setIsPlaying] = useState(false); const lastQuestionRef = useRef(""); @@ -251,6 +253,9 @@ export function Component(): JSX.Element { index={0} speechUrls={speechUrl} updateSpeechUrls={setSpeechUrl} + audio={audio} + isPlaying={isPlaying} + setIsPlaying={setIsPlaying} isStreaming={false} onCitationClicked={x => onShowCitation(x)} onThoughtProcessClicked={() => onToggleTab(AnalysisPanelTabs.ThoughtProcessTab)} diff --git a/app/frontend/src/pages/chat/Chat.tsx b/app/frontend/src/pages/chat/Chat.tsx index 716ccfe015..d0406b7b8c 100644 --- a/app/frontend/src/pages/chat/Chat.tsx +++ b/app/frontend/src/pages/chat/Chat.tsx @@ -76,6 +76,8 @@ const Chat = () => { const [showSpeechInput, setShowSpeechInput] = useState(false); const [showSpeechOutputBrowser, setShowSpeechOutputBrowser] = useState(false); const [showSpeechOutputAzure, setShowSpeechOutputAzure] = useState(false); + const audio = useRef(new Audio()).current; + const [isPlaying, setIsPlaying] = useState(false); const getConfig = async () => { configApi().then(config => { @@ -359,6 +361,9 @@ const Chat = () => { index={index} speechUrls={speechUrls} updateSpeechUrls={setSpeechUrls} + audio={audio} + isPlaying={isPlaying} + setIsPlaying={setIsPlaying} isSelected={false} onCitationClicked={c => onShowCitation(c, index)} onThoughtProcessClicked={() => onToggleTab(AnalysisPanelTabs.ThoughtProcessTab, index)} @@ -382,7 +387,10 @@ const Chat = () => { answer={answer[1]} index={index} speechUrls={speechUrls} + audio={audio} updateSpeechUrls={setSpeechUrls} + isPlaying={isPlaying} + setIsPlaying={setIsPlaying} isSelected={selectedAnswer === index && activeAnalysisPanelTab !== undefined} onCitationClicked={c => onShowCitation(c, index)} onThoughtProcessClicked={() => onToggleTab(AnalysisPanelTabs.ThoughtProcessTab, index)} From 0b75a3942d94e17f04fe4e1827ba3a9a8377b88b Mon Sep 17 00:00:00 2001 From: John Aziz Date: Fri, 23 Aug 2024 20:01:03 +0300 Subject: [PATCH 4/7] create speechConfig type to group speech config --- app/frontend/src/api/models.ts | 8 ++++ app/frontend/src/components/Answer/Answer.tsx | 25 ++----------- .../components/Answer/SpeechOutputAzure.tsx | 37 +++++++++---------- app/frontend/src/pages/ask/Ask.tsx | 20 ++++++---- app/frontend/src/pages/chat/Chat.tsx | 23 ++++++------ 5 files changed, 54 insertions(+), 59 deletions(-) diff --git a/app/frontend/src/api/models.ts b/app/frontend/src/api/models.ts index 3ac5590bf5..c8d264e362 100644 --- a/app/frontend/src/api/models.ts +++ b/app/frontend/src/api/models.ts @@ -92,3 +92,11 @@ export type Config = { export type SimpleAPIResponse = { message?: string; }; + +export interface SpeechConfig { + speechUrls: (string | null)[]; + setSpeechUrls: (urls: (string | null)[]) => void; + audio: HTMLAudioElement; + isPlaying: boolean; + setIsPlaying: (isPlaying: boolean) => void; +} diff --git a/app/frontend/src/components/Answer/Answer.tsx b/app/frontend/src/components/Answer/Answer.tsx index f404f4b1b6..22f182f64d 100644 --- a/app/frontend/src/components/Answer/Answer.tsx +++ b/app/frontend/src/components/Answer/Answer.tsx @@ -6,7 +6,7 @@ import remarkGfm from "remark-gfm"; import rehypeRaw from "rehype-raw"; import styles from "./Answer.module.css"; -import { ChatAppResponse, getCitationFilePath } from "../../api"; +import { ChatAppResponse, getCitationFilePath, SpeechConfig } from "../../api"; import { parseAnswerToHtml } from "./AnswerParser"; import { AnswerIcon } from "./AnswerIcon"; import { SpeechOutputBrowser } from "./SpeechOutputBrowser"; @@ -15,11 +15,7 @@ import { SpeechOutputAzure } from "./SpeechOutputAzure"; interface Props { answer: ChatAppResponse; index: number; - speechUrls: (string | null)[]; - updateSpeechUrls: (urls: (string | null)[]) => void; - audio: HTMLAudioElement; - isPlaying: boolean; - setIsPlaying: (isPlaying: boolean) => void; + speechConfig: SpeechConfig; isSelected?: boolean; isStreaming: boolean; onCitationClicked: (filePath: string) => void; @@ -34,11 +30,7 @@ interface Props { export const Answer = ({ answer, index, - speechUrls, - updateSpeechUrls, - audio, - isPlaying, - setIsPlaying, + speechConfig, isSelected, isStreaming, onCitationClicked, @@ -77,16 +69,7 @@ export const Answer = ({ disabled={!answer.context.data_points} /> {showSpeechOutputAzure && ( - + )} {showSpeechOutputBrowser && } diff --git a/app/frontend/src/components/Answer/SpeechOutputAzure.tsx b/app/frontend/src/components/Answer/SpeechOutputAzure.tsx index 7be823faf8..bbc7a70463 100644 --- a/app/frontend/src/components/Answer/SpeechOutputAzure.tsx +++ b/app/frontend/src/components/Answer/SpeechOutputAzure.tsx @@ -1,50 +1,49 @@ import { useState } from "react"; import { IconButton } from "@fluentui/react"; -import { getSpeechApi } from "../../api"; +import { getSpeechApi, SpeechConfig } from "../../api"; interface Props { answer: string; - urls: (string | null)[]; - updateSpeechUrls: (urls: (string | null)[]) => void; + speechConfig: SpeechConfig; index: number; - audio: HTMLAudioElement; - isPlaying: boolean; - setIsPlaying: (isPlaying: boolean) => void; isStreaming: boolean; } -export const SpeechOutputAzure = ({ answer, urls, updateSpeechUrls, index, audio, isPlaying, setIsPlaying, isStreaming }: Props) => { +export const SpeechOutputAzure = ({ answer, speechConfig, index, isStreaming }: Props) => { const [isLoading, setIsLoading] = useState(false); const [localPlayingState, setLocalPlayingState] = useState(false); const playAudio = async (url: string) => { - audio.src = url; - await audio + speechConfig.audio.src = url; + await speechConfig.audio .play() .then(() => { - audio.onended = () => setIsPlaying(false); - setIsPlaying(true); + speechConfig.audio.onended = () => { + speechConfig.setIsPlaying(false); + setLocalPlayingState(false); + }; + speechConfig.setIsPlaying(true); setLocalPlayingState(true); }) .catch(() => { alert("Failed to play speech output."); console.error("Failed to play speech output."); - setIsPlaying(false); + speechConfig.setIsPlaying(false); setLocalPlayingState(false); }); }; const startOrStopSpeech = async (answer: string) => { - if (isPlaying) { - audio.pause(); - audio.currentTime = 0; - setIsPlaying(false); + if (speechConfig.isPlaying) { + speechConfig.audio.pause(); + speechConfig.audio.currentTime = 0; + speechConfig.setIsPlaying(false); setLocalPlayingState(false); return; } - if (urls[index]) { - playAudio(urls[index]); + if (speechConfig.speechUrls[index]) { + playAudio(speechConfig.speechUrls[index]); return; } setIsLoading(true); @@ -55,7 +54,7 @@ export const SpeechOutputAzure = ({ answer, urls, updateSpeechUrls, index, audio return; } setIsLoading(false); - updateSpeechUrls(urls.map((url, i) => (i === index ? speechUrl : url))); + speechConfig.setSpeechUrls(speechConfig.speechUrls.map((url, i) => (i === index ? speechUrl : url))); playAudio(speechUrl); }); }; diff --git a/app/frontend/src/pages/ask/Ask.tsx b/app/frontend/src/pages/ask/Ask.tsx index e3eebb4549..3b6c5ea856 100644 --- a/app/frontend/src/pages/ask/Ask.tsx +++ b/app/frontend/src/pages/ask/Ask.tsx @@ -4,7 +4,7 @@ import { useId } from "@fluentui/react-hooks"; import styles from "./Ask.module.css"; -import { askApi, configApi, getSpeechApi, ChatAppResponse, ChatAppRequest, RetrievalMode, VectorFieldOptions, GPT4VInput } from "../../api"; +import { askApi, configApi, ChatAppResponse, ChatAppRequest, RetrievalMode, VectorFieldOptions, GPT4VInput, SpeechConfig } from "../../api"; import { Answer, AnswerError } from "../../components/Answer"; import { QuestionInput } from "../../components/QuestionInput"; import { ExampleList } from "../../components/Example"; @@ -56,7 +56,15 @@ export function Component(): JSX.Element { const [isLoading, setIsLoading] = useState(false); const [error, setError] = useState(); const [answer, setAnswer] = useState(); - const [speechUrl, setSpeechUrl] = useState<(string | null)[]>([]); + const [speechUrls, setSpeechUrls] = useState<(string | null)[]>([]); + + const speechConfig: SpeechConfig = { + speechUrls, + setSpeechUrls, + audio, + isPlaying, + setIsPlaying + }; const [activeCitation, setActiveCitation] = useState(); const [activeAnalysisPanelTab, setActiveAnalysisPanelTab] = useState(undefined); @@ -128,7 +136,7 @@ export function Component(): JSX.Element { }; const result = await askApi(request, token); setAnswer(result); - setSpeechUrl([null]); + setSpeechUrls([null]); } catch (e) { setError(e); } finally { @@ -251,11 +259,7 @@ export function Component(): JSX.Element { onShowCitation(x)} onThoughtProcessClicked={() => onToggleTab(AnalysisPanelTabs.ThoughtProcessTab)} diff --git a/app/frontend/src/pages/chat/Chat.tsx b/app/frontend/src/pages/chat/Chat.tsx index d0406b7b8c..b64c988d2a 100644 --- a/app/frontend/src/pages/chat/Chat.tsx +++ b/app/frontend/src/pages/chat/Chat.tsx @@ -15,7 +15,8 @@ import { ChatAppRequest, ResponseMessage, VectorFieldOptions, - GPT4VInput + GPT4VInput, + SpeechConfig } from "../../api"; import { Answer, AnswerError, AnswerLoading } from "../../components/Answer"; import { QuestionInput } from "../../components/QuestionInput"; @@ -79,6 +80,14 @@ const Chat = () => { const audio = useRef(new Audio()).current; const [isPlaying, setIsPlaying] = useState(false); + const speechConfig: SpeechConfig = { + speechUrls, + setSpeechUrls, + audio, + isPlaying, + setIsPlaying + }; + const getConfig = async () => { configApi().then(config => { setShowGPT4VOptions(config.showGPT4VOptions); @@ -359,11 +368,7 @@ const Chat = () => { key={index} answer={streamedAnswer[1]} index={index} - speechUrls={speechUrls} - updateSpeechUrls={setSpeechUrls} - audio={audio} - isPlaying={isPlaying} - setIsPlaying={setIsPlaying} + speechConfig={speechConfig} isSelected={false} onCitationClicked={c => onShowCitation(c, index)} onThoughtProcessClicked={() => onToggleTab(AnalysisPanelTabs.ThoughtProcessTab, index)} @@ -386,11 +391,7 @@ const Chat = () => { key={index} answer={answer[1]} index={index} - speechUrls={speechUrls} - audio={audio} - updateSpeechUrls={setSpeechUrls} - isPlaying={isPlaying} - setIsPlaying={setIsPlaying} + speechConfig={speechConfig} isSelected={selectedAnswer === index && activeAnalysisPanelTab !== undefined} onCitationClicked={c => onShowCitation(c, index)} onThoughtProcessClicked={() => onToggleTab(AnalysisPanelTabs.ThoughtProcessTab, index)} From 92462a911d8f01b6a27aa242b4aa348837e05034 Mon Sep 17 00:00:00 2001 From: John Aziz Date: Fri, 23 Aug 2024 20:09:40 +0300 Subject: [PATCH 5/7] Update app/frontend/src/pages/ask/Ask.tsx Co-authored-by: Pamela Fox --- app/frontend/src/pages/ask/Ask.tsx | 1 + 1 file changed, 1 insertion(+) diff --git a/app/frontend/src/pages/ask/Ask.tsx b/app/frontend/src/pages/ask/Ask.tsx index 3b6c5ea856..10f6d7e2d5 100644 --- a/app/frontend/src/pages/ask/Ask.tsx +++ b/app/frontend/src/pages/ask/Ask.tsx @@ -56,6 +56,7 @@ export function Component(): JSX.Element { const [isLoading, setIsLoading] = useState(false); const [error, setError] = useState(); const [answer, setAnswer] = useState(); + // For the Ask tab, this array will hold a maximum of one URL const [speechUrls, setSpeechUrls] = useState<(string | null)[]>([]); const speechConfig: SpeechConfig = { From 2d959bd346604b89024f8c0eacc1326149ebb5b9 Mon Sep 17 00:00:00 2001 From: John Aziz Date: Fri, 23 Aug 2024 20:11:09 +0300 Subject: [PATCH 6/7] Update app/frontend/src/pages/chat/Chat.tsx --- app/frontend/src/pages/chat/Chat.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/frontend/src/pages/chat/Chat.tsx b/app/frontend/src/pages/chat/Chat.tsx index b64c988d2a..248788c5b6 100644 --- a/app/frontend/src/pages/chat/Chat.tsx +++ b/app/frontend/src/pages/chat/Chat.tsx @@ -67,8 +67,8 @@ const Chat = () => { const [selectedAnswer, setSelectedAnswer] = useState(0); const [answers, setAnswers] = useState<[user: string, response: ChatAppResponse][]>([]); - const [speechUrls, setSpeechUrls] = useState<(string | null)[]>([]); const [streamedAnswers, setStreamedAnswers] = useState<[user: string, response: ChatAppResponse][]>([]); + const [speechUrls, setSpeechUrls] = useState<(string | null)[]>([]); const [showGPT4VOptions, setShowGPT4VOptions] = useState(false); const [showSemanticRankerOption, setShowSemanticRankerOption] = useState(false); From 4fcd5b541c197fdd6e7612a28d36a1de9f6d7754 Mon Sep 17 00:00:00 2001 From: Pamela Fox Date: Fri, 23 Aug 2024 11:19:01 -0700 Subject: [PATCH 7/7] Preload the sync icon --- .../components/Answer/SpeechOutputAzure.tsx | 23 +++++++++++-------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/app/frontend/src/components/Answer/SpeechOutputAzure.tsx b/app/frontend/src/components/Answer/SpeechOutputAzure.tsx index bbc7a70463..61087c3659 100644 --- a/app/frontend/src/components/Answer/SpeechOutputAzure.tsx +++ b/app/frontend/src/components/Answer/SpeechOutputAzure.tsx @@ -60,16 +60,21 @@ export const SpeechOutputAzure = ({ answer, speechConfig, index, isStreaming }: }; const color = localPlayingState ? "red" : "black"; + + // We always preload the Sync icon in hidden mode so that there's no visual glitch when icon changes return isLoading ? ( - + ) : ( - startOrStopSpeech(answer)} - disabled={isStreaming} - /> + <> + + startOrStopSpeech(answer)} + disabled={isStreaming} + /> + ); };