Skip to content

Commit c333fcf

Browse files
committed
webui : Revised LaTeX formula recognition
1 parent 66b0dbc commit c333fcf

File tree

2 files changed

+150
-2
lines changed

2 files changed

+150
-2
lines changed

tools/server/webui/src/lib/components/app/misc/MarkdownContent.svelte

Lines changed: 140 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -164,10 +164,148 @@
164164
});
165165
}
166166
167+
// See also:
168+
// https://github.com/danny-avila/LibreChat/blob/main/client/src/utils/latex.ts
169+
170+
// Protect code blocks: ```...``` and `...`
171+
const codeBlockRegex = /(```[\s\S]*?```|`[^`\n]+`)/g;
172+
173+
export function preprocessLaTeX(content: string): string {
174+
// Step 1: Protect code blocks
175+
const codeBlocks: string[] = [];
176+
content = content.replace(codeBlockRegex, (match) => {
177+
codeBlocks.push(match);
178+
return `<<CODE_BLOCK_${codeBlocks.length - 1}>>`;
179+
});
180+
181+
// Step 2: Protect existing LaTeX expressions
182+
const latexExpressions: string[] = [];
183+
184+
// Match \(...\), \[...\], $$...$$ and protect them
185+
content = content.replace(/(\$\$[\s\S]*?\$\$|\\\[[\s\S]*?\\\]|\\\(.*?\\\))/g, (match) => {
186+
latexExpressions.push(match);
187+
return `<<LATEX_${latexExpressions.length - 1}>>`;
188+
});
189+
190+
// Protect inline $...$ but NOT if it looks like money (e.g., $10, $3.99)
191+
content = protectLaTeXButNotMoney(content, latexExpressions);
192+
193+
// Step 3: Escape standalone $ before digits (currency like $5 → \$5)
194+
// (Now that inline math is protected, this will only escape dollars not already protected)
195+
content = content.replace(/\$(?=\d)/g, '\\$');
196+
197+
// Step 4: Restore protected LaTeX expressions (they are valid)
198+
content = content.replace(/<<LATEX_(\d+)>>/g, (_, index) => {
199+
return latexExpressions[parseInt(index)];
200+
});
201+
202+
// Step 5: Restore code blocks
203+
content = content.replace(/<<CODE_BLOCK_(\d+)>>/g, (_, index) => {
204+
return codeBlocks[parseInt(index)];
205+
});
206+
207+
// Step 6: Apply additional escaping functions (brackets and mhchem)
208+
content = escapeBrackets(content);
209+
if (content.includes('\\ce{') || content.includes('\\pu{')) {
210+
content = escapeMhchem(content);
211+
}
212+
213+
// Final pass: Convert \(...\) → $...$, \[...\] → $$...$$
214+
content = content
215+
.replace(/\\\((.+?)\\\)/g, '$$$1$') // inline
216+
.replace(/\\\[(.+?)\\\]/g, '$$$$1$$'); // display
217+
218+
return content;
219+
}
220+
221+
function protectLaTeXButNotMoney(content: string, latexExpressions: string[]): string {
222+
if (content.indexOf('$') == -1) {
223+
return content;
224+
}
225+
return content
226+
.split('\n')
227+
.map((line) => {
228+
if (line.indexOf('$') == -1) {
229+
return line;
230+
}
231+
let result = '';
232+
let index = 0;
233+
while (index + 2 < line.length) {
234+
const openIndex = line.indexOf('$', index);
235+
if (openIndex == -1) {
236+
result += line.slice(index);
237+
break;
238+
}
239+
240+
// Is there a next $-sign?
241+
const nextIndex = line.indexOf('$', openIndex + 1);
242+
if (nextIndex == -1) {
243+
result += line.slice(index);
244+
break;
245+
}
246+
247+
const beforeOpenChar = openIndex > 0 ? line[openIndex - 1] : '';
248+
const afterOpenChar = line[openIndex + 1];
249+
const afterCloseChar = nextIndex + 1 < line.length ? line[nextIndex + 1] : '';
250+
if (/[A-Za-z0-9_$-]/.test(beforeOpenChar)) {
251+
// character, digit, $, _ or - before first '$', no TeX.
252+
result += line.slice(index, openIndex + 1);
253+
index = openIndex + 1;
254+
continue;
255+
}
256+
if (/[0-9]/.test(afterOpenChar) && /[A-Za-z0-9_$-]/.test(afterCloseChar)) {
257+
// First $ seems to belong to an amount.
258+
result += line.slice(index, openIndex + 1);
259+
index = openIndex + 1;
260+
continue;
261+
}
262+
263+
// Treat as LaTeX
264+
result += line.slice(index, openIndex);
265+
const latexContent = line.slice(openIndex, nextIndex + 1);
266+
latexExpressions.push(latexContent);
267+
result += `<<LATEX_${latexExpressions.length - 1}>>`;
268+
index = nextIndex + 1;
269+
}
270+
return result;
271+
})
272+
.join('\n');
273+
}
274+
275+
function escapeBrackets(text: string): string {
276+
const pattern = /(```[\S\s]*?```|`.*?`)|\\\[([\S\s]*?[^\\])\\]|\\\((.*?)\\\)/g;
277+
return text.replace(
278+
pattern,
279+
(
280+
match: string,
281+
codeBlock: string | undefined,
282+
squareBracket: string | undefined,
283+
roundBracket: string | undefined
284+
): string => {
285+
if (codeBlock != null) {
286+
return codeBlock;
287+
} else if (squareBracket != null) {
288+
return `$$${squareBracket}$$`;
289+
} else if (roundBracket != null) {
290+
return `$${roundBracket}$`;
291+
}
292+
return match;
293+
}
294+
);
295+
}
296+
297+
// Escape $\\ce{...} → $\\ce{...} but with proper handling
298+
function escapeMhchem(text: string): string {
299+
return text.replaceAll('$\\ce{', '$\\\\ce{').replaceAll('$\\pu{', '$\\\\pu{');
300+
}
301+
167302
async function processMarkdown(text: string): Promise<string> {
168303
try {
169-
const normalized = normalizeMathDelimiters(text);
170-
const result = await processor().process(normalized);
304+
// const normalized = normalizeMathDelimiters(text);
305+
// const result = await processor().process(normalized);
306+
const processedText = preprocessLaTeX(text);
307+
308+
const result = await processor().process(processedText);
171309
const html = String(result);
172310
const enhancedLinks = enhanceLinks(html);
173311

tools/server/webui/src/stories/fixtures/math-formulas.ts

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,16 @@ $$\lim_{x \to 0} \frac{\sin x}{x} = 1$$
150150
151151
$$\lim_{n \to \infty} \left(1 + \frac{x}{n}\right)^n = e^x$$
152152
153+
## Further Bracket Styles
154+
155+
- \( \mathrm{GL}_2(\mathbb{F}_7) \): Group of invertible matrices with entries in \(\mathbb{F}_7\).
156+
- Some kernel of \(\mathrm{SL}_2(\mathbb{F}_7)\):
157+
\[
158+
\left\{ \begin{pmatrix} 1 & 0 \\ 0 & 1 \end{pmatrix}, \begin{pmatrix} -1 & 0 \\ 0 & -1 \end{pmatrix} \right\} = \{\pm I\}
159+
\]
160+
- $100 and $12.99 are amounts, not LaTeX.
161+
- I have $10, $3.99 and $x + y$ and $100x$. The amount is $2,000.
162+
153163
---
154164
155165
*This document showcases various mathematical notation and formulas that can be rendered in markdown using LaTeX syntax.*

0 commit comments

Comments
 (0)