improve MarkdownDisplay

ngxson · ngxson · commit 124df6e7c91f · 2025-02-06T12:33:52.000+01:00
diff --git a/examples/server/public/index.html.gz b/examples/server/public/index.html.gz
diff --git a/examples/server/webui/public/demo-conversation.json b/examples/server/webui/public/demo-conversation.json
@@ -11,7 +11,7 @@
     {
       "id": 1734087548327,
       "role": "assistant",
-      "content": "This is the formula:\n\n$\\frac{e^{x_i}}{\\sum_{j=1}^{n}e^{x_j}}$\n\nGiven an input vector \\(\\mathbf{x} = [x_1, x_2, \\ldots, x_n]\\)\n\n\\[\ny_i = \\frac{e^{x_i}}{\\sum_{j=1}^n e^{x_j}}\n\\]\n\nCode block latex:\n```latex\n\\frac{e^{x_i}}{\\sum_{j=1}^{n}e^{x_j}}\n```\n\nTest dollar sign: $1234 $4567\n\nInvalid latex syntax: $E = mc^$ and $$E = mc^$$",
+      "content": "This is the formula:\n\n$\\frac{e^{x_i}}{\\sum_{j=1}^{n}e^{x_j}}$\n\nGiven an input vector \\(\\mathbf{x} = [x_1, x_2, \\ldots, x_n]\\)\n\n\\[\ny_i = \\frac{e^{x_i}}{\\sum_{j=1}^n e^{x_j}}\n\\]\n\n$2x + y = z$\n\nCode block latex:\n```latex\n\\frac{e^{x_i}}{\\sum_{j=1}^{n}e^{x_j}}\n```\n\nTest dollar sign: $1234 $4567\n\nInvalid latex syntax: $E = mc^$ and $$E = mc^$$",
       "timings": {
         "prompt_n": 1,
         "prompt_ms": 28.923,
diff --git a/examples/server/webui/src/components/ChatMessage.tsx b/examples/server/webui/src/components/ChatMessage.tsx
@@ -1,14 +1,8 @@
-import React, { useMemo, useState } from 'react';
+import { useMemo, useState } from 'react';
 import { useAppContext } from '../utils/app.context';
 import { Message, PendingMessage } from '../utils/types';
-import { classNames, copyStr } from '../utils/misc';
-import Markdown, { ExtraProps } from 'react-markdown';
-import remarkGfm from 'remark-gfm';
-import rehypeHightlight from 'rehype-highlight';
-import rehypeKatex from 'rehype-katex';
-import remarkMath from 'remark-math';
-import remarkBreaks from 'remark-breaks';
-import 'katex/dist/katex.min.css';
+import { classNames } from '../utils/misc';
+import MarkdownDisplay from './MarkdownDisplay';
 
 interface SplitMessage {
   content: PendingMessage['content'];
@@ -234,128 +228,3 @@ export default function ChatMessage({
     </div>
   );
 }
-
-const Pre: React.ElementType<
-  React.ClassAttributes<HTMLPreElement> &
-    React.HTMLAttributes<HTMLPreElement> &
-    ExtraProps & { origContent: string }
-> = ({ node, origContent, ...props }) => {
-  const startOffset = node?.position?.start.offset ?? 0;
-  const endOffset = node?.position?.end.offset ?? 0;
-
-  const [copied, setCopied] = useState(false);
-  const copiedContent = useMemo(
-    () =>
-      origContent
-        .substring(startOffset, endOffset)
-        .replace(/^```[^\n]+\n/g, '')
-        .replace(/```$/g, ''),
-    [origContent, startOffset, endOffset]
-  );
-
-  if (!node?.position) {
-    return <pre {...props} />;
-  }
-
-  return (
-    <div className="relative my-4">
-      <div
-        className="text-right sticky top-4 mb-2 mr-2 h-0"
-        onClick={() => {
-          copyStr(copiedContent);
-          setCopied(true);
-        }}
-        onMouseLeave={() => setCopied(false)}
-      >
-        <button className="badge btn-mini">
-          {copied ? 'Copied!' : '📋 Copy'}
-        </button>
-      </div>
-      <pre {...props} />
-    </div>
-  );
-};
-
-function MarkdownDisplay({ content }: { content: string }) {
-  const preprocessedContent = useMemo(
-    () => preprocessLaTeX(content),
-    [content]
-  );
-  return (
-    <Markdown
-      remarkPlugins={[remarkGfm, remarkMath, remarkBreaks]}
-      rehypePlugins={[rehypeHightlight, rehypeKatex]}
-      components={{
-        pre: (props) => <Pre {...props} origContent={preprocessedContent} />,
-      }}
-    >
-      {preprocessedContent}
-    </Markdown>
-  );
-}
-
-/**
- * Preprocesses LaTeX content by replacing delimiters and escaping certain characters.
- * Ref: https://github.com/remarkjs/react-markdown/issues/785
- */
-export function preprocessLaTeX(content: string): string {
-  // Step 1: Protect code blocks
-  const codeBlocks: string[] = [];
-  content = content.replace(/(```[\s\S]*?```|`[^`\n]+`)/g, (_, code) => {
-    codeBlocks.push(code);
-    return `<<CODE_BLOCK_${codeBlocks.length - 1}>>`;
-  });
-
-  // Step 2: Protect existing LaTeX expressions
-  const latexExpressions: string[] = [];
-  content = content.replace(
-    /(\$\$[\s\S]*?\$\$|\\\[[\s\S]*?\\\]|\\\(.*?\\\))/g,
-    (match) => {
-      latexExpressions.push(match);
-      return `<<LATEX_${latexExpressions.length - 1}>>`;
-    }
-  );
-
-  // Step 3: Escape dollar signs that are likely currency indicators
-  content = content.replace(/\$(?=\d)/g, '\\$');
-
-  // Step 4: Restore LaTeX expressions
-  content = content.replace(
-    /<<LATEX_(\d+)>>/g,
-    (_, index) => latexExpressions[parseInt(index)]
-  );
-
-  // Step 5: Restore code blocks
-  content = content.replace(
-    /<<CODE_BLOCK_(\d+)>>/g,
-    (_, index) => codeBlocks[parseInt(index)]
-  );
-
-  // Step 6: Apply additional escaping functions
-  content = escapeBrackets(content);
-  content = escapeMhchem(content);
-
-  return content;
-}
-
-function escapeBrackets(text: string) {
-  const pattern =
-    /(```[\S\s]*?```|`.*?`)|\\\[([\S\s]*?[^\\])\\]|\\\((.*?)\\\)/g;
-  return text.replaceAll(
-    pattern,
-    (match, codeBlock, squareBracket, roundBracket) => {
-      if (codeBlock) {
-        return codeBlock;
-      } else if (squareBracket) {
-        return `$$${squareBracket}$$`;
-      } else if (roundBracket) {
-        return `$${roundBracket}$`;
-      }
-      return match;
-    }
-  );
-}
-
-function escapeMhchem(text: string) {
-  return text.replaceAll('$\\ce{', '$\\\\ce{').replaceAll('$\\pu{', '$\\\\pu{');
-}
diff --git a/examples/server/webui/src/components/MarkdownDisplay.tsx b/examples/server/webui/src/components/MarkdownDisplay.tsx
@@ -0,0 +1,193 @@
+import React, { useMemo, useState } from 'react';
+import Markdown, { ExtraProps } from 'react-markdown';
+import remarkGfm from 'remark-gfm';
+import rehypeHightlight from 'rehype-highlight';
+import rehypeKatex from 'rehype-katex';
+import remarkMath from 'remark-math';
+import remarkBreaks from 'remark-breaks';
+import 'katex/dist/katex.min.css';
+import { copyStr } from '../utils/misc';
+
+export default function MarkdownDisplay({ content }: { content: string }) {
+  const preprocessedContent = useMemo(
+    () => preprocessLaTeX(content),
+    [content]
+  );
+  return (
+    <Markdown
+      remarkPlugins={[remarkGfm, remarkMath, remarkBreaks]}
+      rehypePlugins={[rehypeHightlight, rehypeKatex]}
+      components={{
+        pre: (props) => <Pre {...props} origContent={preprocessedContent} />,
+      }}
+    >
+      {preprocessedContent}
+    </Markdown>
+  );
+}
+
+const Pre: React.ElementType<
+  React.ClassAttributes<HTMLPreElement> &
+    React.HTMLAttributes<HTMLPreElement> &
+    ExtraProps & { origContent: string }
+> = ({ node, origContent, ...props }) => {
+  const startOffset = node?.position?.start.offset ?? 0;
+  const endOffset = node?.position?.end.offset ?? 0;
+
+  const [copied, setCopied] = useState(false);
+  const copiedContent = useMemo(
+    () =>
+      origContent
+        .substring(startOffset, endOffset)
+        .replace(/^```[^\n]+\n/g, '')
+        .replace(/```$/g, ''),
+    [origContent, startOffset, endOffset]
+  );
+
+  if (!node?.position) {
+    return <pre {...props} />;
+  }
+
+  return (
+    <div className="relative my-4">
+      <div
+        className="text-right sticky top-4 mb-2 mr-2 h-0"
+        onClick={() => {
+          copyStr(copiedContent);
+          setCopied(true);
+        }}
+        onMouseLeave={() => setCopied(false)}
+      >
+        <button className="badge btn-mini">
+          {copied ? 'Copied!' : '📋 Copy'}
+        </button>
+      </div>
+      <pre {...props} />
+    </div>
+  );
+};
+
+/**
+ * The part below is copied and adapted from:
+ * https://github.com/danny-avila/LibreChat/blob/main/client/src/utils/latex.ts
+ * (MIT License)
+ */
+
+// Regex to check if the processed content contains any potential LaTeX patterns
+const containsLatexRegex =
+  /\\\(.*?\\\)|\\\[.*?\\\]|\$.*?\$|\\begin\{equation\}.*?\\end\{equation\}/;
+
+// Regex for inline and block LaTeX expressions
+const inlineLatex = new RegExp(/\\\((.+?)\\\)/, 'g');
+const blockLatex = new RegExp(/\\\[(.*?[^\\])\\\]/, 'gs');
+
+// Function to restore code blocks
+const restoreCodeBlocks = (content: string, codeBlocks: string[]) => {
+  return content.replace(
+    /<<CODE_BLOCK_(\d+)>>/g,
+    (_, index) => codeBlocks[index]
+  );
+};
+
+// Regex to identify code blocks and inline code
+const codeBlockRegex = /(```[\s\S]*?```|`.*?`)/g;
+
+export const processLaTeX = (_content: string) => {
+  let content = _content;
+  // Temporarily replace code blocks and inline code with placeholders
+  const codeBlocks: string[] = [];
+  let index = 0;
+  content = content.replace(codeBlockRegex, (match) => {
+    codeBlocks[index] = match;
+    return `<<CODE_BLOCK_${index++}>>`;
+  });
+
+  // Escape dollar signs followed by a digit or space and digit
+  let processedContent = content.replace(/(\$)(?=\s?\d)/g, '\\$');
+
+  // If no LaTeX patterns are found, restore code blocks and return the processed content
+  if (!containsLatexRegex.test(processedContent)) {
+    return restoreCodeBlocks(processedContent, codeBlocks);
+  }
+
+  // Convert LaTeX expressions to a markdown compatible format
+  processedContent = processedContent
+    .replace(inlineLatex, (_: string, equation: string) => `$${equation}$`) // Convert inline LaTeX
+    .replace(blockLatex, (_: string, equation: string) => `$$${equation}$$`); // Convert block LaTeX
+
+  // Restore code blocks
+  return restoreCodeBlocks(processedContent, codeBlocks);
+};
+
+/**
+ * Preprocesses LaTeX content by replacing delimiters and escaping certain characters.
+ *
+ * @param content The input string containing LaTeX expressions.
+ * @returns The processed string with replaced delimiters and escaped characters.
+ */
+export function preprocessLaTeX(content: string): string {
+  // Step 1: Protect code blocks
+  const codeBlocks: string[] = [];
+  content = content.replace(/(```[\s\S]*?```|`[^`\n]+`)/g, (_, code) => {
+    codeBlocks.push(code);
+    return `<<CODE_BLOCK_${codeBlocks.length - 1}>>`;
+  });
+
+  // Step 2: Protect existing LaTeX expressions
+  const latexExpressions: string[] = [];
+  content = content.replace(
+    /(\$\$[\s\S]*?\$\$|\\\[[\s\S]*?\\\]|\\\(.*?\\\))/g,
+    (match) => {
+      latexExpressions.push(match);
+      return `<<LATEX_${latexExpressions.length - 1}>>`;
+    }
+  );
+
+  // Step 3: Escape dollar signs that are likely currency indicators
+  content = content.replace(/\$(?=\d)/g, '\\$');
+
+  // Step 4: Restore LaTeX expressions
+  content = content.replace(
+    /<<LATEX_(\d+)>>/g,
+    (_, index) => latexExpressions[parseInt(index)]
+  );
+
+  // Step 5: Restore code blocks
+  content = content.replace(
+    /<<CODE_BLOCK_(\d+)>>/g,
+    (_, index) => codeBlocks[parseInt(index)]
+  );
+
+  // Step 6: Apply additional escaping functions
+  content = escapeBrackets(content);
+  content = escapeMhchem(content);
+
+  return content;
+}
+
+export function escapeBrackets(text: string): string {
+  const pattern =
+    /(```[\S\s]*?```|`.*?`)|\\\[([\S\s]*?[^\\])\\]|\\\((.*?)\\\)/g;
+  return text.replace(
+    pattern,
+    (
+      match: string,
+      codeBlock: string | undefined,
+      squareBracket: string | undefined,
+      roundBracket: string | undefined
+    ): string => {
+      if (codeBlock != null) {
+        return codeBlock;
+      } else if (squareBracket != null) {
+        return `$$${squareBracket}$$`;
+      } else if (roundBracket != null) {
+        return `$${roundBracket}$`;
+      }
+      return match;
+    }
+  );
+}
+
+export function escapeMhchem(text: string) {
+  return text.replaceAll('$\\ce{', '$\\\\ce{').replaceAll('$\\pu{', '$\\\\pu{');
+}

Original file line number	Diff line number	Diff line change
`@@ -11,7 +11,7 @@`
`11`	`11`	`{`
`12`	`12`	`"id": 1734087548327,`
`13`	`13`	`"role": "assistant",`
`14`		- "content": "This is the formula:\n\n$\\frac{e^{x_i}}{\\sum_{j=1}^{n}e^{x_j}}$\n\nGiven an input vector \\(\\mathbf{x} = [x_1, x_2, \\ldots, x_n]\\)\n\n\\[\ny_i = \\frac{e^{x_i}}{\\sum_{j=1}^n e^{x_j}}\n\\]\n\nCode block latex:\n```latex\n\\frac{e^{x_i}}{\\sum_{j=1}^{n}e^{x_j}}\n```\n\nTest dollar sign: $1234 $4567\n\nInvalid latex syntax: $E = mc^$ and $$E = mc^$$",
	`14`	+ "content": "This is the formula:\n\n$\\frac{e^{x_i}}{\\sum_{j=1}^{n}e^{x_j}}$\n\nGiven an input vector \\(\\mathbf{x} = [x_1, x_2, \\ldots, x_n]\\)\n\n\\[\ny_i = \\frac{e^{x_i}}{\\sum_{j=1}^n e^{x_j}}\n\\]\n\n$2x + y = z$\n\nCode block latex:\n```latex\n\\frac{e^{x_i}}{\\sum_{j=1}^{n}e^{x_j}}\n```\n\nTest dollar sign: $1234 $4567\n\nInvalid latex syntax: $E = mc^$ and $$E = mc^$$",
`15`	`15`	`"timings": {`
`16`	`16`	`"prompt_n": 1,`
`17`	`17`	`"prompt_ms": 28.923,`