diff --git a/app/frontend/src/api/models.ts b/app/frontend/src/api/models.ts
index 3ac5590bf5..c8d264e362 100644
--- a/app/frontend/src/api/models.ts
+++ b/app/frontend/src/api/models.ts
@@ -92,3 +92,11 @@ export type Config = {
export type SimpleAPIResponse = {
message?: string;
};
+
+export interface SpeechConfig {
+ speechUrls: (string | null)[];
+ setSpeechUrls: (urls: (string | null)[]) => void;
+ audio: HTMLAudioElement;
+ isPlaying: boolean;
+ setIsPlaying: (isPlaying: boolean) => void;
+}
diff --git a/app/frontend/src/components/Answer/Answer.tsx b/app/frontend/src/components/Answer/Answer.tsx
index c6d77748f2..22f182f64d 100644
--- a/app/frontend/src/components/Answer/Answer.tsx
+++ b/app/frontend/src/components/Answer/Answer.tsx
@@ -6,7 +6,7 @@ import remarkGfm from "remark-gfm";
import rehypeRaw from "rehype-raw";
import styles from "./Answer.module.css";
-import { ChatAppResponse, getCitationFilePath } from "../../api";
+import { ChatAppResponse, getCitationFilePath, SpeechConfig } from "../../api";
import { parseAnswerToHtml } from "./AnswerParser";
import { AnswerIcon } from "./AnswerIcon";
import { SpeechOutputBrowser } from "./SpeechOutputBrowser";
@@ -14,6 +14,8 @@ import { SpeechOutputAzure } from "./SpeechOutputAzure";
interface Props {
answer: ChatAppResponse;
+ index: number;
+ speechConfig: SpeechConfig;
isSelected?: boolean;
isStreaming: boolean;
onCitationClicked: (filePath: string) => void;
@@ -23,11 +25,12 @@ interface Props {
showFollowupQuestions?: boolean;
showSpeechOutputBrowser?: boolean;
showSpeechOutputAzure?: boolean;
- speechUrl: string | null;
}
export const Answer = ({
answer,
+ index,
+ speechConfig,
isSelected,
isStreaming,
onCitationClicked,
@@ -36,13 +39,11 @@ export const Answer = ({
onFollowupQuestionClicked,
showFollowupQuestions,
showSpeechOutputAzure,
- showSpeechOutputBrowser,
- speechUrl
+ showSpeechOutputBrowser
}: Props) => {
const followupQuestions = answer.context?.followup_questions;
const messageContent = answer.message.content;
const parsedAnswer = useMemo(() => parseAnswerToHtml(messageContent, isStreaming, onCitationClicked), [answer]);
-
const sanitizedAnswerHtml = DOMPurify.sanitize(parsedAnswer.answerHtml);
return (
@@ -67,7 +68,9 @@ export const Answer = ({
onClick={() => onSupportingContentClicked()}
disabled={!answer.context.data_points}
/>
- {showSpeechOutputAzure && }
+ {showSpeechOutputAzure && (
+
+ )}
{showSpeechOutputBrowser && }
diff --git a/app/frontend/src/components/Answer/SpeechOutputAzure.tsx b/app/frontend/src/components/Answer/SpeechOutputAzure.tsx
index 214b5a0191..61087c3659 100644
--- a/app/frontend/src/components/Answer/SpeechOutputAzure.tsx
+++ b/app/frontend/src/components/Answer/SpeechOutputAzure.tsx
@@ -1,44 +1,80 @@
import { useState } from "react";
import { IconButton } from "@fluentui/react";
+import { getSpeechApi, SpeechConfig } from "../../api";
interface Props {
- url: string | null;
+ answer: string;
+ speechConfig: SpeechConfig;
+ index: number;
+ isStreaming: boolean;
}
-let audio = new Audio();
+export const SpeechOutputAzure = ({ answer, speechConfig, index, isStreaming }: Props) => {
+ const [isLoading, setIsLoading] = useState(false);
+ const [localPlayingState, setLocalPlayingState] = useState(false);
-export const SpeechOutputAzure = ({ url }: Props) => {
- const [isPlaying, setIsPlaying] = useState(false);
+ const playAudio = async (url: string) => {
+ speechConfig.audio.src = url;
+ await speechConfig.audio
+ .play()
+ .then(() => {
+ speechConfig.audio.onended = () => {
+ speechConfig.setIsPlaying(false);
+ setLocalPlayingState(false);
+ };
+ speechConfig.setIsPlaying(true);
+ setLocalPlayingState(true);
+ })
+ .catch(() => {
+ alert("Failed to play speech output.");
+ console.error("Failed to play speech output.");
+ speechConfig.setIsPlaying(false);
+ setLocalPlayingState(false);
+ });
+ };
- const startOrStopAudio = async () => {
- if (isPlaying) {
- audio.pause();
- setIsPlaying(false);
+ const startOrStopSpeech = async (answer: string) => {
+ if (speechConfig.isPlaying) {
+ speechConfig.audio.pause();
+ speechConfig.audio.currentTime = 0;
+ speechConfig.setIsPlaying(false);
+ setLocalPlayingState(false);
return;
}
-
- if (!url) {
- console.error("Speech output is not yet available.");
+ if (speechConfig.speechUrls[index]) {
+ playAudio(speechConfig.speechUrls[index]);
return;
}
- audio = new Audio(url);
- await audio.play();
- audio.addEventListener("ended", () => {
- setIsPlaying(false);
+ setIsLoading(true);
+ await getSpeechApi(answer).then(async speechUrl => {
+ if (!speechUrl) {
+ alert("Speech output is not available.");
+ console.error("Speech output is not available.");
+ return;
+ }
+ setIsLoading(false);
+ speechConfig.setSpeechUrls(speechConfig.speechUrls.map((url, i) => (i === index ? speechUrl : url)));
+ playAudio(speechUrl);
});
- setIsPlaying(true);
};
- const color = isPlaying ? "red" : "black";
- return (
- startOrStopAudio()}
- disabled={!url}
- />
+ const color = localPlayingState ? "red" : "black";
+
+ // We always preload the Sync icon in hidden mode so that there's no visual glitch when icon changes
+ return isLoading ? (
+
+ ) : (
+ <>
+
+ startOrStopSpeech(answer)}
+ disabled={isStreaming}
+ />
+ >
);
};
diff --git a/app/frontend/src/pages/ask/Ask.tsx b/app/frontend/src/pages/ask/Ask.tsx
index 84d1ea87e3..10f6d7e2d5 100644
--- a/app/frontend/src/pages/ask/Ask.tsx
+++ b/app/frontend/src/pages/ask/Ask.tsx
@@ -4,7 +4,7 @@ import { useId } from "@fluentui/react-hooks";
import styles from "./Ask.module.css";
-import { askApi, configApi, getSpeechApi, ChatAppResponse, ChatAppRequest, RetrievalMode, VectorFieldOptions, GPT4VInput } from "../../api";
+import { askApi, configApi, ChatAppResponse, ChatAppRequest, RetrievalMode, VectorFieldOptions, GPT4VInput, SpeechConfig } from "../../api";
import { Answer, AnswerError } from "../../components/Answer";
import { QuestionInput } from "../../components/QuestionInput";
import { ExampleList } from "../../components/Example";
@@ -48,13 +48,24 @@ export function Component(): JSX.Element {
const [showSpeechInput, setShowSpeechInput] = useState(false);
const [showSpeechOutputBrowser, setShowSpeechOutputBrowser] = useState(false);
const [showSpeechOutputAzure, setShowSpeechOutputAzure] = useState(false);
+ const audio = useRef(new Audio()).current;
+ const [isPlaying, setIsPlaying] = useState(false);
const lastQuestionRef = useRef("");
const [isLoading, setIsLoading] = useState(false);
const [error, setError] = useState();
const [answer, setAnswer] = useState();
- const [speechUrl, setSpeechUrl] = useState(null);
+ // For the Ask tab, this array will hold a maximum of one URL
+ const [speechUrls, setSpeechUrls] = useState<(string | null)[]>([]);
+
+ const speechConfig: SpeechConfig = {
+ speechUrls,
+ setSpeechUrls,
+ audio,
+ isPlaying,
+ setIsPlaying
+ };
const [activeCitation, setActiveCitation] = useState();
const [activeAnalysisPanelTab, setActiveAnalysisPanelTab] = useState(undefined);
@@ -82,14 +93,6 @@ export function Component(): JSX.Element {
getConfig();
}, []);
- useEffect(() => {
- if (answer && showSpeechOutputAzure) {
- getSpeechApi(answer.message.content).then(speechUrl => {
- setSpeechUrl(speechUrl);
- });
- }
- }, [answer]);
-
const makeApiRequest = async (question: string) => {
lastQuestionRef.current = question;
@@ -134,7 +137,7 @@ export function Component(): JSX.Element {
};
const result = await askApi(request, token);
setAnswer(result);
- setSpeechUrl(null);
+ setSpeechUrls([null]);
} catch (e) {
setError(e);
} finally {
@@ -256,13 +259,14 @@ export function Component(): JSX.Element {
onShowCitation(x)}
onThoughtProcessClicked={() => onToggleTab(AnalysisPanelTabs.ThoughtProcessTab)}
onSupportingContentClicked={() => onToggleTab(AnalysisPanelTabs.SupportingContentTab)}
showSpeechOutputAzure={showSpeechOutputAzure}
showSpeechOutputBrowser={showSpeechOutputBrowser}
- speechUrl={speechUrl}
/>
)}
diff --git a/app/frontend/src/pages/chat/Chat.tsx b/app/frontend/src/pages/chat/Chat.tsx
index 99725263ab..248788c5b6 100644
--- a/app/frontend/src/pages/chat/Chat.tsx
+++ b/app/frontend/src/pages/chat/Chat.tsx
@@ -9,14 +9,14 @@ import styles from "./Chat.module.css";
import {
chatApi,
configApi,
- getSpeechApi,
RetrievalMode,
ChatAppResponse,
ChatAppResponseOrError,
ChatAppRequest,
ResponseMessage,
VectorFieldOptions,
- GPT4VInput
+ GPT4VInput,
+ SpeechConfig
} from "../../api";
import { Answer, AnswerError, AnswerLoading } from "../../components/Answer";
import { QuestionInput } from "../../components/QuestionInput";
@@ -77,6 +77,16 @@ const Chat = () => {
const [showSpeechInput, setShowSpeechInput] = useState(false);
const [showSpeechOutputBrowser, setShowSpeechOutputBrowser] = useState(false);
const [showSpeechOutputAzure, setShowSpeechOutputAzure] = useState(false);
+ const audio = useRef(new Audio()).current;
+ const [isPlaying, setIsPlaying] = useState(false);
+
+ const speechConfig: SpeechConfig = {
+ speechUrls,
+ setSpeechUrls,
+ audio,
+ isPlaying,
+ setIsPlaying
+ };
const getConfig = async () => {
configApi().then(config => {
@@ -199,6 +209,7 @@ const Chat = () => {
}
setAnswers([...answers, [question, parsedResponse as ChatAppResponse]]);
}
+ setSpeechUrls([...speechUrls, null]);
} catch (e) {
setError(e);
} finally {
@@ -212,6 +223,7 @@ const Chat = () => {
setActiveCitation(undefined);
setActiveAnalysisPanelTab(undefined);
setAnswers([]);
+ setSpeechUrls([]);
setStreamedAnswers([]);
setIsLoading(false);
setIsStreaming(false);
@@ -223,19 +235,6 @@ const Chat = () => {
getConfig();
}, []);
- useEffect(() => {
- if (answers && showSpeechOutputAzure) {
- // For each answer that is missing a speech URL, fetch the speech URL
- for (let i = 0; i < answers.length; i++) {
- if (!speechUrls[i]) {
- getSpeechApi(answers[i][1].message.content).then(speechUrl => {
- setSpeechUrls([...speechUrls.slice(0, i), speechUrl, ...speechUrls.slice(i + 1)]);
- });
- }
- }
- }
- }, [answers]);
-
const onPromptTemplateChange = (_ev?: React.FormEvent, newValue?: string) => {
setPromptTemplate(newValue || "");
};
@@ -368,6 +367,8 @@ const Chat = () => {
isStreaming={true}
key={index}
answer={streamedAnswer[1]}
+ index={index}
+ speechConfig={speechConfig}
isSelected={false}
onCitationClicked={c => onShowCitation(c, index)}
onThoughtProcessClicked={() => onToggleTab(AnalysisPanelTabs.ThoughtProcessTab, index)}
@@ -376,7 +377,6 @@ const Chat = () => {
showFollowupQuestions={useSuggestFollowupQuestions && answers.length - 1 === index}
showSpeechOutputAzure={showSpeechOutputAzure}
showSpeechOutputBrowser={showSpeechOutputBrowser}
- speechUrl={speechUrls[index]}
/>
@@ -390,6 +390,8 @@ const Chat = () => {
isStreaming={false}
key={index}
answer={answer[1]}
+ index={index}
+ speechConfig={speechConfig}
isSelected={selectedAnswer === index && activeAnalysisPanelTab !== undefined}
onCitationClicked={c => onShowCitation(c, index)}
onThoughtProcessClicked={() => onToggleTab(AnalysisPanelTabs.ThoughtProcessTab, index)}
@@ -398,7 +400,6 @@ const Chat = () => {
showFollowupQuestions={useSuggestFollowupQuestions && answers.length - 1 === index}
showSpeechOutputAzure={showSpeechOutputAzure}
showSpeechOutputBrowser={showSpeechOutputBrowser}
- speechUrl={speechUrls[index]}
/>