Skip to content

Commit 8ba2e42

Browse files
committed
webui : Revised LaTeX formula recognition
1 parent e60f241 commit 8ba2e42

File tree

2 files changed

+148
-1
lines changed

2 files changed

+148
-1
lines changed

tools/server/webui/src/lib/components/app/misc/MarkdownContent.svelte

Lines changed: 138 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -154,9 +154,146 @@
154154
return mutated ? tempDiv.innerHTML : html;
155155
}
156156
157+
// See also:
158+
// https://github.com/danny-avila/LibreChat/blob/main/client/src/utils/latex.ts
159+
160+
// Protect code blocks: ```...``` and `...`
161+
const codeBlockRegex = /(```[\s\S]*?```|`[^`\n]+`)/g;
162+
163+
export function preprocessLaTeX(content: string): string {
164+
// Step 1: Protect code blocks
165+
const codeBlocks: string[] = [];
166+
content = content.replace(codeBlockRegex, (match) => {
167+
codeBlocks.push(match);
168+
return `<<CODE_BLOCK_${codeBlocks.length - 1}>>`;
169+
});
170+
171+
// Step 2: Protect existing LaTeX expressions
172+
const latexExpressions: string[] = [];
173+
174+
// Match \(...\), \[...\], $$...$$ and protect them
175+
content = content.replace(/(\$\$[\s\S]*?\$\$|\\\[[\s\S]*?\\\]|\\\(.*?\\\))/g, (match) => {
176+
latexExpressions.push(match);
177+
return `<<LATEX_${latexExpressions.length - 1}>>`;
178+
});
179+
180+
// Protect inline $...$ but NOT if it looks like money (e.g., $10, $3.99)
181+
content = protectLaTeXButNotMoney(content, latexExpressions);
182+
183+
// Step 3: Escape standalone $ before digits (currency like $5 → \$5)
184+
// (Now that inline math is protected, this will only escape dollars not already protected)
185+
content = content.replace(/\$(?=\d)/g, '\\$');
186+
187+
// Step 4: Restore protected LaTeX expressions (they are valid)
188+
content = content.replace(/<<LATEX_(\d+)>>/g, (_, index) => {
189+
return latexExpressions[parseInt(index)];
190+
});
191+
192+
// Step 5: Restore code blocks
193+
content = content.replace(/<<CODE_BLOCK_(\d+)>>/g, (_, index) => {
194+
return codeBlocks[parseInt(index)];
195+
});
196+
197+
// Step 6: Apply additional escaping functions (brackets and mhchem)
198+
content = escapeBrackets(content);
199+
if (content.includes('\\ce{') || content.includes('\\pu{')) {
200+
content = escapeMhchem(content);
201+
}
202+
203+
// Final pass: Convert \(...\) → $...$, \[...\] → $$...$$
204+
content = content
205+
.replace(/\\\((.+?)\\\)/g, '$$$1$') // inline
206+
.replace(/\\\[(.+?)\\\]/g, '$$$$1$$'); // display
207+
208+
return content;
209+
}
210+
211+
function protectLaTeXButNotMoney(content: string, latexExpressions: string[]): string {
212+
if (content.indexOf('$') == -1) {
213+
return content;
214+
}
215+
return content
216+
.split('\n')
217+
.map((line) => {
218+
if (line.indexOf('$') == -1) {
219+
return line;
220+
}
221+
let result = '';
222+
let index = 0;
223+
while (index + 2 < line.length) {
224+
const openIndex = line.indexOf('$', index);
225+
if (openIndex == -1) {
226+
result += line.slice(index);
227+
break;
228+
}
229+
230+
// Is there a next $-sign?
231+
const nextIndex = line.indexOf('$', openIndex + 1);
232+
if (nextIndex == -1) {
233+
result += line.slice(index);
234+
break;
235+
}
236+
237+
const beforeOpenChar = openIndex > 0 ? line[openIndex - 1] : '';
238+
const afterOpenChar = line[openIndex + 1];
239+
const afterCloseChar = nextIndex + 1 < line.length ? line[nextIndex + 1] : '';
240+
if (/[A-Za-z0-9_$-]/.test(beforeOpenChar)) {
241+
// character, digit, $, _ or - before first '$', no TeX.
242+
result += line.slice(index, openIndex + 1);
243+
index = openIndex + 1;
244+
continue;
245+
}
246+
if (/[0-9]/.test(afterOpenChar) && /[A-Za-z0-9_$-]/.test(afterCloseChar)) {
247+
// First $ seems to belong to an amount.
248+
result += line.slice(index, openIndex + 1);
249+
index = openIndex + 1;
250+
continue;
251+
}
252+
253+
// Treat as LaTeX
254+
result += line.slice(index, openIndex);
255+
const latexContent = line.slice(openIndex, nextIndex + 1);
256+
latexExpressions.push(latexContent);
257+
result += `<<LATEX_${latexExpressions.length - 1}>>`;
258+
index = nextIndex + 1;
259+
}
260+
return result;
261+
})
262+
.join('\n');
263+
}
264+
265+
function escapeBrackets(text: string): string {
266+
const pattern = /(```[\S\s]*?```|`.*?`)|\\\[([\S\s]*?[^\\])\\]|\\\((.*?)\\\)/g;
267+
return text.replace(
268+
pattern,
269+
(
270+
match: string,
271+
codeBlock: string | undefined,
272+
squareBracket: string | undefined,
273+
roundBracket: string | undefined
274+
): string => {
275+
if (codeBlock != null) {
276+
return codeBlock;
277+
} else if (squareBracket != null) {
278+
return `$$${squareBracket}$$`;
279+
} else if (roundBracket != null) {
280+
return `$${roundBracket}$`;
281+
}
282+
return match;
283+
}
284+
);
285+
}
286+
287+
// Escape $\\ce{...} → $\\ce{...} but with proper handling
288+
function escapeMhchem(text: string): string {
289+
return text.replaceAll('$\\ce{', '$\\\\ce{').replaceAll('$\\pu{', '$\\\\pu{');
290+
}
291+
157292
async function processMarkdown(text: string): Promise<string> {
158293
try {
159-
const result = await processor().process(text);
294+
const processedText = preprocessLaTeX(text);
295+
296+
const result = await processor().process(processedText);
160297
const html = String(result);
161298
const enhancedLinks = enhanceLinks(html);
162299

tools/server/webui/src/stories/fixtures/math-formulas.ts

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,16 @@ $$\lim_{x \to 0} \frac{\sin x}{x} = 1$$
150150
151151
$$\lim_{n \to \infty} \left(1 + \frac{x}{n}\right)^n = e^x$$
152152
153+
## Further Bracket Styles
154+
155+
- \( \mathrm{GL}_2(\mathbb{F}_7) \): Group of invertible matrices with entries in \(\mathbb{F}_7\).
156+
- Some kernel of \(\mathrm{SL}_2(\mathbb{F}_7)\):
157+
\[
158+
\left\{ \begin{pmatrix} 1 & 0 \\ 0 & 1 \end{pmatrix}, \begin{pmatrix} -1 & 0 \\ 0 & -1 \end{pmatrix} \right\} = \{\pm I\}
159+
\]
160+
- $100 and $12.99 are amounts, not LaTeX.
161+
- I have $10, $3.99 and $x + y$ and $100x$. The amount is $2,000.
162+
153163
---
154164
155165
*This document showcases various mathematical notation and formulas that can be rendered in markdown using LaTeX syntax.*

0 commit comments

Comments
 (0)