Skip to content

Commit 124df6e

Browse files
committed
improve MarkdownDisplay
1 parent 71235f6 commit 124df6e

File tree

4 files changed

+197
-135
lines changed

4 files changed

+197
-135
lines changed
75 Bytes
Binary file not shown.

examples/server/webui/public/demo-conversation.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
{
1212
"id": 1734087548327,
1313
"role": "assistant",
14-
"content": "This is the formula:\n\n$\\frac{e^{x_i}}{\\sum_{j=1}^{n}e^{x_j}}$\n\nGiven an input vector \\(\\mathbf{x} = [x_1, x_2, \\ldots, x_n]\\)\n\n\\[\ny_i = \\frac{e^{x_i}}{\\sum_{j=1}^n e^{x_j}}\n\\]\n\nCode block latex:\n```latex\n\\frac{e^{x_i}}{\\sum_{j=1}^{n}e^{x_j}}\n```\n\nTest dollar sign: $1234 $4567\n\nInvalid latex syntax: $E = mc^$ and $$E = mc^$$",
14+
"content": "This is the formula:\n\n$\\frac{e^{x_i}}{\\sum_{j=1}^{n}e^{x_j}}$\n\nGiven an input vector \\(\\mathbf{x} = [x_1, x_2, \\ldots, x_n]\\)\n\n\\[\ny_i = \\frac{e^{x_i}}{\\sum_{j=1}^n e^{x_j}}\n\\]\n\n$2x + y = z$\n\nCode block latex:\n```latex\n\\frac{e^{x_i}}{\\sum_{j=1}^{n}e^{x_j}}\n```\n\nTest dollar sign: $1234 $4567\n\nInvalid latex syntax: $E = mc^$ and $$E = mc^$$",
1515
"timings": {
1616
"prompt_n": 1,
1717
"prompt_ms": 28.923,

examples/server/webui/src/components/ChatMessage.tsx

Lines changed: 3 additions & 134 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,8 @@
1-
import React, { useMemo, useState } from 'react';
1+
import { useMemo, useState } from 'react';
22
import { useAppContext } from '../utils/app.context';
33
import { Message, PendingMessage } from '../utils/types';
4-
import { classNames, copyStr } from '../utils/misc';
5-
import Markdown, { ExtraProps } from 'react-markdown';
6-
import remarkGfm from 'remark-gfm';
7-
import rehypeHightlight from 'rehype-highlight';
8-
import rehypeKatex from 'rehype-katex';
9-
import remarkMath from 'remark-math';
10-
import remarkBreaks from 'remark-breaks';
11-
import 'katex/dist/katex.min.css';
4+
import { classNames } from '../utils/misc';
5+
import MarkdownDisplay from './MarkdownDisplay';
126

137
interface SplitMessage {
148
content: PendingMessage['content'];
@@ -234,128 +228,3 @@ export default function ChatMessage({
234228
</div>
235229
);
236230
}
237-
238-
const Pre: React.ElementType<
239-
React.ClassAttributes<HTMLPreElement> &
240-
React.HTMLAttributes<HTMLPreElement> &
241-
ExtraProps & { origContent: string }
242-
> = ({ node, origContent, ...props }) => {
243-
const startOffset = node?.position?.start.offset ?? 0;
244-
const endOffset = node?.position?.end.offset ?? 0;
245-
246-
const [copied, setCopied] = useState(false);
247-
const copiedContent = useMemo(
248-
() =>
249-
origContent
250-
.substring(startOffset, endOffset)
251-
.replace(/^```[^\n]+\n/g, '')
252-
.replace(/```$/g, ''),
253-
[origContent, startOffset, endOffset]
254-
);
255-
256-
if (!node?.position) {
257-
return <pre {...props} />;
258-
}
259-
260-
return (
261-
<div className="relative my-4">
262-
<div
263-
className="text-right sticky top-4 mb-2 mr-2 h-0"
264-
onClick={() => {
265-
copyStr(copiedContent);
266-
setCopied(true);
267-
}}
268-
onMouseLeave={() => setCopied(false)}
269-
>
270-
<button className="badge btn-mini">
271-
{copied ? 'Copied!' : '📋 Copy'}
272-
</button>
273-
</div>
274-
<pre {...props} />
275-
</div>
276-
);
277-
};
278-
279-
function MarkdownDisplay({ content }: { content: string }) {
280-
const preprocessedContent = useMemo(
281-
() => preprocessLaTeX(content),
282-
[content]
283-
);
284-
return (
285-
<Markdown
286-
remarkPlugins={[remarkGfm, remarkMath, remarkBreaks]}
287-
rehypePlugins={[rehypeHightlight, rehypeKatex]}
288-
components={{
289-
pre: (props) => <Pre {...props} origContent={preprocessedContent} />,
290-
}}
291-
>
292-
{preprocessedContent}
293-
</Markdown>
294-
);
295-
}
296-
297-
/**
298-
* Preprocesses LaTeX content by replacing delimiters and escaping certain characters.
299-
* Ref: https://github.com/remarkjs/react-markdown/issues/785
300-
*/
301-
export function preprocessLaTeX(content: string): string {
302-
// Step 1: Protect code blocks
303-
const codeBlocks: string[] = [];
304-
content = content.replace(/(```[\s\S]*?```|`[^`\n]+`)/g, (_, code) => {
305-
codeBlocks.push(code);
306-
return `<<CODE_BLOCK_${codeBlocks.length - 1}>>`;
307-
});
308-
309-
// Step 2: Protect existing LaTeX expressions
310-
const latexExpressions: string[] = [];
311-
content = content.replace(
312-
/(\$\$[\s\S]*?\$\$|\\\[[\s\S]*?\\\]|\\\(.*?\\\))/g,
313-
(match) => {
314-
latexExpressions.push(match);
315-
return `<<LATEX_${latexExpressions.length - 1}>>`;
316-
}
317-
);
318-
319-
// Step 3: Escape dollar signs that are likely currency indicators
320-
content = content.replace(/\$(?=\d)/g, '\\$');
321-
322-
// Step 4: Restore LaTeX expressions
323-
content = content.replace(
324-
/<<LATEX_(\d+)>>/g,
325-
(_, index) => latexExpressions[parseInt(index)]
326-
);
327-
328-
// Step 5: Restore code blocks
329-
content = content.replace(
330-
/<<CODE_BLOCK_(\d+)>>/g,
331-
(_, index) => codeBlocks[parseInt(index)]
332-
);
333-
334-
// Step 6: Apply additional escaping functions
335-
content = escapeBrackets(content);
336-
content = escapeMhchem(content);
337-
338-
return content;
339-
}
340-
341-
function escapeBrackets(text: string) {
342-
const pattern =
343-
/(```[\S\s]*?```|`.*?`)|\\\[([\S\s]*?[^\\])\\]|\\\((.*?)\\\)/g;
344-
return text.replaceAll(
345-
pattern,
346-
(match, codeBlock, squareBracket, roundBracket) => {
347-
if (codeBlock) {
348-
return codeBlock;
349-
} else if (squareBracket) {
350-
return `$$${squareBracket}$$`;
351-
} else if (roundBracket) {
352-
return `$${roundBracket}$`;
353-
}
354-
return match;
355-
}
356-
);
357-
}
358-
359-
function escapeMhchem(text: string) {
360-
return text.replaceAll('$\\ce{', '$\\\\ce{').replaceAll('$\\pu{', '$\\\\pu{');
361-
}
Lines changed: 193 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,193 @@
1+
import React, { useMemo, useState } from 'react';
2+
import Markdown, { ExtraProps } from 'react-markdown';
3+
import remarkGfm from 'remark-gfm';
4+
import rehypeHightlight from 'rehype-highlight';
5+
import rehypeKatex from 'rehype-katex';
6+
import remarkMath from 'remark-math';
7+
import remarkBreaks from 'remark-breaks';
8+
import 'katex/dist/katex.min.css';
9+
import { copyStr } from '../utils/misc';
10+
11+
export default function MarkdownDisplay({ content }: { content: string }) {
12+
const preprocessedContent = useMemo(
13+
() => preprocessLaTeX(content),
14+
[content]
15+
);
16+
return (
17+
<Markdown
18+
remarkPlugins={[remarkGfm, remarkMath, remarkBreaks]}
19+
rehypePlugins={[rehypeHightlight, rehypeKatex]}
20+
components={{
21+
pre: (props) => <Pre {...props} origContent={preprocessedContent} />,
22+
}}
23+
>
24+
{preprocessedContent}
25+
</Markdown>
26+
);
27+
}
28+
29+
const Pre: React.ElementType<
30+
React.ClassAttributes<HTMLPreElement> &
31+
React.HTMLAttributes<HTMLPreElement> &
32+
ExtraProps & { origContent: string }
33+
> = ({ node, origContent, ...props }) => {
34+
const startOffset = node?.position?.start.offset ?? 0;
35+
const endOffset = node?.position?.end.offset ?? 0;
36+
37+
const [copied, setCopied] = useState(false);
38+
const copiedContent = useMemo(
39+
() =>
40+
origContent
41+
.substring(startOffset, endOffset)
42+
.replace(/^```[^\n]+\n/g, '')
43+
.replace(/```$/g, ''),
44+
[origContent, startOffset, endOffset]
45+
);
46+
47+
if (!node?.position) {
48+
return <pre {...props} />;
49+
}
50+
51+
return (
52+
<div className="relative my-4">
53+
<div
54+
className="text-right sticky top-4 mb-2 mr-2 h-0"
55+
onClick={() => {
56+
copyStr(copiedContent);
57+
setCopied(true);
58+
}}
59+
onMouseLeave={() => setCopied(false)}
60+
>
61+
<button className="badge btn-mini">
62+
{copied ? 'Copied!' : '📋 Copy'}
63+
</button>
64+
</div>
65+
<pre {...props} />
66+
</div>
67+
);
68+
};
69+
70+
/**
71+
* The part below is copied and adapted from:
72+
* https://github.com/danny-avila/LibreChat/blob/main/client/src/utils/latex.ts
73+
* (MIT License)
74+
*/
75+
76+
// Regex to check if the processed content contains any potential LaTeX patterns
77+
const containsLatexRegex =
78+
/\\\(.*?\\\)|\\\[.*?\\\]|\$.*?\$|\\begin\{equation\}.*?\\end\{equation\}/;
79+
80+
// Regex for inline and block LaTeX expressions
81+
const inlineLatex = new RegExp(/\\\((.+?)\\\)/, 'g');
82+
const blockLatex = new RegExp(/\\\[(.*?[^\\])\\\]/, 'gs');
83+
84+
// Function to restore code blocks
85+
const restoreCodeBlocks = (content: string, codeBlocks: string[]) => {
86+
return content.replace(
87+
/<<CODE_BLOCK_(\d+)>>/g,
88+
(_, index) => codeBlocks[index]
89+
);
90+
};
91+
92+
// Regex to identify code blocks and inline code
93+
const codeBlockRegex = /(```[\s\S]*?```|`.*?`)/g;
94+
95+
export const processLaTeX = (_content: string) => {
96+
let content = _content;
97+
// Temporarily replace code blocks and inline code with placeholders
98+
const codeBlocks: string[] = [];
99+
let index = 0;
100+
content = content.replace(codeBlockRegex, (match) => {
101+
codeBlocks[index] = match;
102+
return `<<CODE_BLOCK_${index++}>>`;
103+
});
104+
105+
// Escape dollar signs followed by a digit or space and digit
106+
let processedContent = content.replace(/(\$)(?=\s?\d)/g, '\\$');
107+
108+
// If no LaTeX patterns are found, restore code blocks and return the processed content
109+
if (!containsLatexRegex.test(processedContent)) {
110+
return restoreCodeBlocks(processedContent, codeBlocks);
111+
}
112+
113+
// Convert LaTeX expressions to a markdown compatible format
114+
processedContent = processedContent
115+
.replace(inlineLatex, (_: string, equation: string) => `$${equation}$`) // Convert inline LaTeX
116+
.replace(blockLatex, (_: string, equation: string) => `$$${equation}$$`); // Convert block LaTeX
117+
118+
// Restore code blocks
119+
return restoreCodeBlocks(processedContent, codeBlocks);
120+
};
121+
122+
/**
123+
* Preprocesses LaTeX content by replacing delimiters and escaping certain characters.
124+
*
125+
* @param content The input string containing LaTeX expressions.
126+
* @returns The processed string with replaced delimiters and escaped characters.
127+
*/
128+
export function preprocessLaTeX(content: string): string {
129+
// Step 1: Protect code blocks
130+
const codeBlocks: string[] = [];
131+
content = content.replace(/(```[\s\S]*?```|`[^`\n]+`)/g, (_, code) => {
132+
codeBlocks.push(code);
133+
return `<<CODE_BLOCK_${codeBlocks.length - 1}>>`;
134+
});
135+
136+
// Step 2: Protect existing LaTeX expressions
137+
const latexExpressions: string[] = [];
138+
content = content.replace(
139+
/(\$\$[\s\S]*?\$\$|\\\[[\s\S]*?\\\]|\\\(.*?\\\))/g,
140+
(match) => {
141+
latexExpressions.push(match);
142+
return `<<LATEX_${latexExpressions.length - 1}>>`;
143+
}
144+
);
145+
146+
// Step 3: Escape dollar signs that are likely currency indicators
147+
content = content.replace(/\$(?=\d)/g, '\\$');
148+
149+
// Step 4: Restore LaTeX expressions
150+
content = content.replace(
151+
/<<LATEX_(\d+)>>/g,
152+
(_, index) => latexExpressions[parseInt(index)]
153+
);
154+
155+
// Step 5: Restore code blocks
156+
content = content.replace(
157+
/<<CODE_BLOCK_(\d+)>>/g,
158+
(_, index) => codeBlocks[parseInt(index)]
159+
);
160+
161+
// Step 6: Apply additional escaping functions
162+
content = escapeBrackets(content);
163+
content = escapeMhchem(content);
164+
165+
return content;
166+
}
167+
168+
export function escapeBrackets(text: string): string {
169+
const pattern =
170+
/(```[\S\s]*?```|`.*?`)|\\\[([\S\s]*?[^\\])\\]|\\\((.*?)\\\)/g;
171+
return text.replace(
172+
pattern,
173+
(
174+
match: string,
175+
codeBlock: string | undefined,
176+
squareBracket: string | undefined,
177+
roundBracket: string | undefined
178+
): string => {
179+
if (codeBlock != null) {
180+
return codeBlock;
181+
} else if (squareBracket != null) {
182+
return `$$${squareBracket}$$`;
183+
} else if (roundBracket != null) {
184+
return `$${roundBracket}$`;
185+
}
186+
return match;
187+
}
188+
);
189+
}
190+
191+
export function escapeMhchem(text: string) {
192+
return text.replaceAll('$\\ce{', '$\\\\ce{').replaceAll('$\\pu{', '$\\\\pu{');
193+
}

0 commit comments

Comments
 (0)