From 23df7051b78d446c89e53091a5ffedc1de349753 Mon Sep 17 00:00:00 2001 From: BAKHRONOV Date: Fri, 18 Oct 2024 14:17:55 +0900 Subject: [PATCH 1/4] Citation issue fix --- app/frontend/src/components/Answer/AnswerParser.tsx | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/app/frontend/src/components/Answer/AnswerParser.tsx b/app/frontend/src/components/Answer/AnswerParser.tsx index 0f73cd549a..a696bfafcb 100644 --- a/app/frontend/src/components/Answer/AnswerParser.tsx +++ b/app/frontend/src/components/Answer/AnswerParser.tsx @@ -6,6 +6,13 @@ type HtmlParsedAnswer = { citations: string[]; }; +// Function to check citation format +// Citation format: AnyFileName.anyExtension +function isValidCitation(citation: string): boolean { + const regex = /^[^\s]+\.[a-zA-Z0-9]+/; + return regex.test(citation); +} + export function parseAnswerToHtml(answer: string, isStreaming: boolean, onCitationClicked: (citationFilePath: string) => void): HtmlParsedAnswer { const citations: string[] = []; @@ -34,6 +41,11 @@ export function parseAnswerToHtml(answer: string, isStreaming: boolean, onCitati return part; } else { let citationIndex: number; + + if (!isValidCitation(part)) { + return `[${part}]`; + } + if (citations.indexOf(part) !== -1) { citationIndex = citations.indexOf(part) + 1; } else { From 2dd1a700a8797df2ddc0bfaef08adfa28ef9905b Mon Sep 17 00:00:00 2001 From: bnodir Date: Sat, 19 Oct 2024 16:40:34 +0900 Subject: [PATCH 2/4] Citation issue fix version2 --- app/frontend/src/components/Answer/Answer.tsx | 3 +- .../src/components/Answer/AnswerParser.tsx | 37 +++++++++++++++---- 2 files changed, 30 insertions(+), 10 deletions(-) diff --git a/app/frontend/src/components/Answer/Answer.tsx b/app/frontend/src/components/Answer/Answer.tsx index e024563acb..e5619e0c33 100644 --- a/app/frontend/src/components/Answer/Answer.tsx +++ b/app/frontend/src/components/Answer/Answer.tsx @@ -43,8 +43,7 @@ export const Answer = ({ showSpeechOutputBrowser }: Props) => { const followupQuestions = answer.context?.followup_questions; - const messageContent = answer.message.content; - const parsedAnswer = useMemo(() => parseAnswerToHtml(messageContent, isStreaming, onCitationClicked), [answer]); + const parsedAnswer = useMemo(() => parseAnswerToHtml(answer, isStreaming, onCitationClicked), [answer]); const { t } = useTranslation(); const sanitizedAnswerHtml = DOMPurify.sanitize(parsedAnswer.answerHtml); diff --git a/app/frontend/src/components/Answer/AnswerParser.tsx b/app/frontend/src/components/Answer/AnswerParser.tsx index a696bfafcb..575f5b191c 100644 --- a/app/frontend/src/components/Answer/AnswerParser.tsx +++ b/app/frontend/src/components/Answer/AnswerParser.tsx @@ -1,23 +1,44 @@ import { renderToStaticMarkup } from "react-dom/server"; -import { getCitationFilePath } from "../../api"; +import { ChatAppResponse, getCitationFilePath } from "../../api"; type HtmlParsedAnswer = { answerHtml: string; citations: string[]; }; -// Function to check citation format -// Citation format: AnyFileName.anyExtension -function isValidCitation(citation: string): boolean { +// Function to validate citation format and check if it is a valid citation within the context +function isCitationValid(contextDataPoints: any, citationCandidate: string): boolean { const regex = /^[^\s]+\.[a-zA-Z0-9]+/; - return regex.test(citation); + if (!regex.test(citationCandidate)) { + return false; + } + + // Check if contextDataPoints is an object with a text property that is an array + let dataPointsArray: string[]; + if (Array.isArray(contextDataPoints)) { + dataPointsArray = contextDataPoints; + } else if (contextDataPoints && Array.isArray(contextDataPoints.text)) { + dataPointsArray = contextDataPoints.text; + } else { + return false; + } + + // Check if the citation is included in any of the strings within the text array + const isValidCitation = dataPointsArray.some(dataPoint => dataPoint.includes(citationCandidate)); + + if (!isValidCitation) { + return false; + } + + return true; } -export function parseAnswerToHtml(answer: string, isStreaming: boolean, onCitationClicked: (citationFilePath: string) => void): HtmlParsedAnswer { +export function parseAnswerToHtml(answer: ChatAppResponse, isStreaming: boolean, onCitationClicked: (citationFilePath: string) => void): HtmlParsedAnswer { + const contextDataPoints = answer.context.data_points; const citations: string[] = []; // trim any whitespace from the end of the answer after removing follow-up questions - let parsedAnswer = answer.trim(); + let parsedAnswer = answer.message.content.trim(); // Omit a citation that is still being typed during streaming if (isStreaming) { @@ -42,7 +63,7 @@ export function parseAnswerToHtml(answer: string, isStreaming: boolean, onCitati } else { let citationIndex: number; - if (!isValidCitation(part)) { + if (!isCitationValid(contextDataPoints, part)) { return `[${part}]`; } From 3c0c7012f8a6120bd7bc5ac74a3bd6ac5ce727c0 Mon Sep 17 00:00:00 2001 From: BAKHRONOV Date: Tue, 22 Oct 2024 20:40:32 +0900 Subject: [PATCH 3/4] Use startsWith --- app/frontend/src/components/Answer/AnswerParser.tsx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/frontend/src/components/Answer/AnswerParser.tsx b/app/frontend/src/components/Answer/AnswerParser.tsx index 575f5b191c..4f351fc1a0 100644 --- a/app/frontend/src/components/Answer/AnswerParser.tsx +++ b/app/frontend/src/components/Answer/AnswerParser.tsx @@ -23,8 +23,8 @@ function isCitationValid(contextDataPoints: any, citationCandidate: string): boo return false; } - // Check if the citation is included in any of the strings within the text array - const isValidCitation = dataPointsArray.some(dataPoint => dataPoint.includes(citationCandidate)); + // Check if dataPoint starts with citation + const isValidCitation = dataPointsArray.some(dataPoint => dataPoint.startsWith(citationCandidate)); if (!isValidCitation) { return false; From 63f5494c5798dd4c50056731bf47d282d6fa3ea9 Mon Sep 17 00:00:00 2001 From: BAKHRONOV Date: Tue, 22 Oct 2024 20:49:19 +0900 Subject: [PATCH 4/4] Update comments --- app/frontend/src/components/Answer/AnswerParser.tsx | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/app/frontend/src/components/Answer/AnswerParser.tsx b/app/frontend/src/components/Answer/AnswerParser.tsx index 4f351fc1a0..76d186d7c9 100644 --- a/app/frontend/src/components/Answer/AnswerParser.tsx +++ b/app/frontend/src/components/Answer/AnswerParser.tsx @@ -6,7 +6,7 @@ type HtmlParsedAnswer = { citations: string[]; }; -// Function to validate citation format and check if it is a valid citation within the context +// Function to validate citation format and check if dataPoint starts with possible citation function isCitationValid(contextDataPoints: any, citationCandidate: string): boolean { const regex = /^[^\s]+\.[a-zA-Z0-9]+/; if (!regex.test(citationCandidate)) { @@ -23,7 +23,6 @@ function isCitationValid(contextDataPoints: any, citationCandidate: string): boo return false; } - // Check if dataPoint starts with citation const isValidCitation = dataPointsArray.some(dataPoint => dataPoint.startsWith(citationCandidate)); if (!isValidCitation) { @@ -37,7 +36,7 @@ export function parseAnswerToHtml(answer: ChatAppResponse, isStreaming: boolean, const contextDataPoints = answer.context.data_points; const citations: string[] = []; - // trim any whitespace from the end of the answer after removing follow-up questions + // Trim any whitespace from the end of the answer after removing follow-up questions let parsedAnswer = answer.message.content.trim(); // Omit a citation that is still being typed during streaming